Files
lidarr-mb-gap/main.py
Danilo Reyes 0dca7474a9 Refactor find_missing_albums method to return tuples and enhance album processing
- Updated the return type of find_missing_albums to return a tuple of lists: albums_to_add and albums_to_update.
- Improved response handling and debugging output for API responses.
- Enhanced album categorization based on status indicators (red for missing, orange for needing updates).
- Updated main function to process and display albums to add and update separately.
- Modified HTML report generation to reflect changes in album categorization.
2025-11-11 09:51:29 -06:00

602 lines
24 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Script to identify missing albums on MusicBrainz from Deezer releases
for artists monitored in Lidarr, and generate submission links.
This script:
1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all"
2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases
3. Generates a-tisket/harmony links for submitting albums to MusicBrainz
"""
import requests
import json
import sys
import os
from typing import List, Dict, Optional, Tuple
from urllib.parse import quote
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class LidarrClient:
"""Client for interacting with Lidarr API"""
def __init__(self, base_url: str, api_key: str):
self.base_url = base_url.rstrip('/')
self.api_key = api_key
self.headers = {'X-Api-Key': api_key}
def get_artists(self) -> List[Dict]:
"""Fetch all artists from Lidarr"""
url = f"{self.base_url}/api/v1/artist"
try:
response = requests.get(url, headers=self.headers)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr)
return []
def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]:
"""
Get artists with monitorNewItems set to specified values
Args:
monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all'])
Returns:
List of artist dictionaries matching the criteria
"""
if monitor_types is None:
monitor_types = ['new', 'all']
all_artists = self.get_artists()
filtered = [
artist for artist in all_artists
if artist.get('monitorNewItems') in monitor_types
]
return filtered
class SamblClient:
"""
Client for interacting with SAMBL API to find missing albums.
SAMBL (Streaming Artist MusicBrainz Lookup) is available at:
- Website: https://sambl.lioncat6.com
- GitHub: https://github.com/Lioncat6/SAMBL-React
- API Root: https://sambl.lioncat6.com/api/
"""
def __init__(self, base_url: str = None):
# Default to the public SAMBL instance if no URL provided
self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/')
def _search_deezer_artist(self, artist_name: str) -> Optional[str]:
"""
Search for an artist on Deezer and return their Deezer ID.
Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported.
Args:
artist_name: Name of the artist to search for
Returns:
Deezer artist ID as string, or None if not found
"""
try:
# Use Deezer API to search for the artist
deezer_search_url = "https://api.deezer.com/search/artist"
params = {'q': artist_name, 'limit': 1}
response = requests.get(deezer_search_url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
if data.get('data') and len(data['data']) > 0:
artist_id = str(data['data'][0]['id'])
print(f" [Sambl] Found Deezer artist ID: {artist_id}")
return artist_id
else:
print(f" [Sambl] ⚠️ Artist '{artist_name}' not found on Deezer")
return None
except requests.exceptions.RequestException as e:
print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr)
return None
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> Tuple[List[Dict], List[Dict]]:
"""
Find albums missing on MusicBrainz from Deezer releases for an artist.
Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums
from Deezer with MusicBrainz and identifies missing ones.
Args:
artist_mbid: MusicBrainz ID of the artist
artist_name: Name of the artist
Returns:
Tuple of (albums_to_add, albums_to_update)
- albums_to_add: Albums not in MusicBrainz (red status, no mbid)
- albums_to_update: Albums in MusicBrainz but need linking/updates (orange status)
Format:
[
{
'title': 'Album Title',
'deezer_url': 'https://www.deezer.com/album/123456789',
'deezer_id': '123456789',
'release_date': '2024-01-01',
'artist_name': artist_name,
'mbid': 'musicbrainz-id' (only for albums_to_update),
'album_issues': ['issue1', 'issue2'] (only for albums_to_update)
}
]
"""
print(f" [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})")
# First, we need to find the Deezer artist ID
deezer_artist_id = self._search_deezer_artist(artist_name)
if not deezer_artist_id:
return []
# Now use SAMBL's compareArtistAlbums endpoint
try:
api_url = f"{self.base_url}/api/compareArtistAlbums"
params = {
'provider_id': deezer_artist_id,
'provider': 'deezer',
'mbid': artist_mbid,
'full': 'true' # Get full information including missing albums
}
response = requests.get(api_url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
# Debug: Print the raw response structure
print(f" [Sambl] Raw API response structure:")
print(f" [Sambl] Response type: {type(data)}")
if isinstance(data, dict):
print(f" [Sambl] Top-level keys: {list(data.keys())}")
if 'albumData' in data:
album_data = data.get('albumData', [])
print(f" [Sambl] albumData count: {len(album_data)}")
if len(album_data) > 0:
print(f" [Sambl] First album keys: {list(album_data[0].keys()) if isinstance(album_data[0], dict) else 'Not a dict'}")
print(f" [Sambl] First album sample: {json.dumps(album_data[0], indent=2)[:500] if isinstance(album_data[0], dict) else str(album_data[0])[:500]}")
# Check status counts
if 'orange' in data:
print(f" [Sambl] Orange (missing) albums: {data.get('orange', 0)}")
if 'green' in data:
print(f" [Sambl] Green (linked) albums: {data.get('green', 0)}")
if 'red' in data:
print(f" [Sambl] Red albums: {data.get('red', 0)}")
elif isinstance(data, list):
print(f" [Sambl] Response is a list with {len(data)} items")
if len(data) > 0:
print(f" [Sambl] First item keys: {list(data[0].keys()) if isinstance(data[0], dict) else 'Not a dict'}")
print(f" [Sambl] First item sample: {json.dumps(data[0], indent=2)[:500] if isinstance(data[0], dict) else str(data[0])[:500]}")
# Parse the response to extract albums
# SAMBL returns albums in 'albumData' with status indicators:
# - 'red': Not in MusicBrainz (need to add)
# - 'orange': In MusicBrainz but needs linking/updates (need to update)
# - 'green': Properly linked (skip)
albums_to_add = []
albums_to_update = []
albums = []
if isinstance(data, dict):
# SAMBL uses 'albumData' as the key for the albums array
album_data = data.get('albumData')
print(f" [Sambl] albumData type: {type(album_data)}, value: {album_data}")
if isinstance(album_data, list):
albums = album_data
elif isinstance(album_data, dict):
# albumData might be a dict with nested structure
print(f" [Sambl] albumData is dict with keys: {list(album_data.keys()) if album_data else 'None'}")
albums = album_data.get('albums', album_data.get('data', []))
# Fallback to other possible keys
if not albums and isinstance(data.get('albums'), list):
albums = data.get('albums', [])
if not albums and isinstance(data.get('data'), list):
albums = data.get('data', [])
elif isinstance(data, list):
albums = data
print(f" [Sambl] Processing {len(albums)} album(s) from response")
# If we have status counts but no albums, something is wrong
if isinstance(data, dict) and len(albums) == 0:
print(f" [Sambl] ⚠️ Warning: Found status counts but no albums in albumData")
print(f" [Sambl] Full response keys: {list(data.keys())}")
print(f" [Sambl] Total albums reported: {data.get('total', 'N/A')}")
# Try to print a sample of the response structure
print(f" [Sambl] Response sample: {json.dumps(data, indent=2)[:1000]}")
for idx, album in enumerate(albums):
# Get album status and MusicBrainz ID
album_status = str(album.get('albumStatus', '')).lower()
musicbrainz_id = album.get('mbid') or album.get('musicbrainz_id') or album.get('musicbrainzId') or ''
album_title = album.get('name') or album.get('title') or 'Unknown'
album_issues = album.get('albumIssues', [])
# Debug: Print album details
print(f" [Sambl] Album {idx+1}: {album_title}")
print(f" Status: {album_status or 'N/A'}, MBID: {musicbrainz_id or 'None'}, Issues: {album_issues}")
# Extract Deezer URL and album info
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
if not deezer_id or deezer_id == 'None':
print(f" ⚠️ Skipping - no valid Deezer ID found")
continue
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
album_data = {
'title': album_title,
'deezer_url': deezer_url,
'deezer_id': deezer_id,
'release_date': album.get('releaseDate') or album.get('release_date') or album.get('release') or '',
'artist_name': artist_name,
'cover_url': album.get('imageUrl') or album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or ''
}
# Categorize albums based on status
if album_status == 'red' or not musicbrainz_id or musicbrainz_id == '':
# Red status or no MBID = needs to be added to MusicBrainz
albums_to_add.append(album_data)
print(f" ✓ Added to 'to add' list (not in MusicBrainz)")
elif album_status == 'orange':
# Orange status = in MusicBrainz but needs linking/updates
album_data['mbid'] = musicbrainz_id
album_data['mb_url'] = album.get('albumMBUrl', f'https://musicbrainz.org/release/{musicbrainz_id}')
album_data['album_issues'] = album_issues
albums_to_update.append(album_data)
print(f" ✓ Added to 'to update' list (needs linking/updates)")
else:
# Green status = properly linked, skip
print(f" ✓ Album is properly linked (MBID: {musicbrainz_id})")
print(f" [Sambl] ✓ Found {len(albums_to_add)} album(s) to add, {len(albums_to_update)} album(s) to update")
return albums_to_add, albums_to_update
except requests.exceptions.RequestException as e:
print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr)
return [], []
except (KeyError, ValueError, TypeError) as e:
print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr)
print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
return [], []
class SubmissionLinkGenerator:
"""Generate submission links for MusicBrainz using a-tisket and Harmony"""
@staticmethod
def generate_atisket_link(deezer_url: str) -> str:
"""Generate an a-tisket submission link from a Deezer URL"""
encoded_url = quote(deezer_url, safe='')
return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}"
@staticmethod
def generate_harmony_link(deezer_url: str) -> str:
"""Generate a Harmony submission link from a Deezer URL"""
encoded_url = quote(deezer_url, safe='')
return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}"
@staticmethod
def generate_links(deezer_url: str) -> Dict[str, str]:
"""Generate both a-tisket and Harmony links"""
return {
'deezer_url': deezer_url,
'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url),
'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url)
}
def main():
"""Main execution function"""
# Configuration - loaded from .env file or environment variables
LIDARR_URL = os.getenv("LIDARR_URL")
LIDARR_API_KEY = os.getenv("LIDARR_API_KEY")
SAMBL_URL = os.getenv("SAMBL_URL") or None # Set if Sambl has a web API
MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5")) # Limit number of artists to process (default: 5)
# Validate required configuration
if not LIDARR_URL:
print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr)
sys.exit(1)
if not LIDARR_API_KEY:
print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr)
sys.exit(1)
# Initialize clients
lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY)
sambl = SamblClient(SAMBL_URL)
print("Fetching monitored artists from Lidarr...")
artists = lidarr.get_monitored_artists(['new', 'all'])
if not artists:
print("No artists found with monitorNewItems set to 'new' or 'all'")
return
total_artists = len(artists)
# Limit the number of artists for testing
if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS:
print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)")
artists = artists[:MAX_ARTISTS]
else:
print(f"Found {total_artists} monitored artists")
print("\n" + "="*80)
all_albums_to_add = []
all_albums_to_update = []
for artist in artists:
artist_name = artist.get('artistName', 'Unknown')
artist_mbid = artist.get('foreignArtistId') or artist.get('mbid')
if not artist_mbid:
print(f"\n⚠️ Skipping {artist_name} - no MusicBrainz ID found")
continue
print(f"\n🎵 Artist: {artist_name}")
print(f" MusicBrainz ID: {artist_mbid}")
# Find albums using Sambl
albums_to_add, albums_to_update = sambl.find_missing_albums(artist_mbid, artist_name)
# Process albums to add
if albums_to_add:
print(f"\n 📥 Albums to ADD ({len(albums_to_add)}):")
for album in albums_to_add:
deezer_url = album.get('deezer_url')
if deezer_url:
links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links
album['action'] = 'add'
all_albums_to_add.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
# Process albums to update
if albums_to_update:
print(f"\n 🔄 Albums to UPDATE ({len(albums_to_update)}):")
for album in albums_to_update:
deezer_url = album.get('deezer_url')
mb_url = album.get('mb_url', '')
issues = album.get('album_issues', [])
if deezer_url:
links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links
album['action'] = 'update'
all_albums_to_update.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
if mb_url:
print(f" MusicBrainz: {mb_url}")
if issues:
print(f" Issues: {', '.join(issues)}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
if not albums_to_add and not albums_to_update:
print(f" ✓ All albums are properly linked!")
# Generate summary report
print("\n" + "="*80)
print(f"\n📊 Summary:")
print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
print(f" Albums to ADD: {len(all_albums_to_add)}")
print(f" Albums to UPDATE: {len(all_albums_to_update)}")
# Save results to JSON file
all_albums = all_albums_to_add + all_albums_to_update
if all_albums:
output_file = "missing_albums.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump({
'albums_to_add': all_albums_to_add,
'albums_to_update': all_albums_to_update,
'summary': {
'total_to_add': len(all_albums_to_add),
'total_to_update': len(all_albums_to_update),
'total': len(all_albums)
}
}, f, indent=2, ensure_ascii=False)
print(f"\n💾 Results saved to {output_file}")
# Generate HTML report with clickable links
generate_html_report(all_albums_to_add, all_albums_to_update)
else:
print("\n✨ All albums are already on MusicBrainz!")
def generate_html_report(albums_to_add: List[Dict], albums_to_update: List[Dict]):
"""Generate an HTML report with clickable submission links"""
html_content = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MusicBrainz Albums - Add & Update</title>
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}}
h1 {{
color: #333;
border-bottom: 3px solid #4CAF50;
padding-bottom: 10px;
}}
h2 {{
color: #2196F3;
margin-top: 30px;
border-bottom: 2px solid #2196F3;
padding-bottom: 5px;
}}
.album {{
background: white;
border-radius: 8px;
padding: 20px;
margin: 20px 0;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}}
.album-title {{
font-size: 1.5em;
font-weight: bold;
color: #2196F3;
margin-bottom: 10px;
}}
.artist-name {{
color: #666;
margin-bottom: 15px;
}}
.links {{
display: flex;
gap: 10px;
flex-wrap: wrap;
}}
.link-button {{
display: inline-block;
padding: 10px 20px;
background-color: #4CAF50;
color: white;
text-decoration: none;
border-radius: 5px;
transition: background-color 0.3s;
}}
.link-button:hover {{
background-color: #45a049;
}}
.link-button.atisket {{
background-color: #2196F3;
}}
.link-button.atisket:hover {{
background-color: #0b7dda;
}}
.link-button.harmony {{
background-color: #FF9800;
}}
.link-button.harmony:hover {{
background-color: #e68900;
}}
.deezer-link {{
color: #666;
font-size: 0.9em;
margin-top: 10px;
}}
.mb-link {{
color: #666;
font-size: 0.9em;
margin-top: 5px;
}}
.issues {{
color: #FF9800;
font-size: 0.9em;
margin-top: 5px;
font-style: italic;
}}
.summary {{
background: white;
padding: 15px;
border-radius: 8px;
margin-bottom: 20px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}}
</style>
</head>
<body>
<h1>🎵 MusicBrainz Albums - Add & Update</h1>
<div class="summary">
<strong>Albums to ADD: {add_count}</strong> | <strong>Albums to UPDATE: {update_count}</strong>
</div>
"""
album_html = """
<div class="album">
<div class="album-title">{title}</div>
<div class="artist-name">by {artist}</div>
{mb_info}
{issues_info}
<div class="links">
<a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
<a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
</div>
<div class="deezer-link">
<a href="{deezer_url}" target="_blank">View on Deezer</a>
</div>
</div>
"""
def format_album(album, is_update=False):
submission_links = album.get('submission_links', {})
mb_info = ""
issues_info = ""
if is_update:
mb_url = album.get('mb_url', '')
if mb_url:
mb_info = f'<div class="mb-link"><a href="{mb_url}" target="_blank">View on MusicBrainz</a></div>'
issues = album.get('album_issues', [])
if issues:
issues_info = f'<div class="issues">Issues: {", ".join(issues)}</div>'
return album_html.format(
title=album.get('title', 'Unknown Title'),
artist=album.get('artist_name', 'Unknown Artist'),
mb_info=mb_info,
issues_info=issues_info,
atisket_link=submission_links.get('atisket_link', '#'),
harmony_link=submission_links.get('harmony_link', '#'),
deezer_url=submission_links.get('deezer_url', '#')
)
albums_html = ""
# Albums to ADD section
if albums_to_add:
albums_html += '<h2>📥 Albums to ADD (Not in MusicBrainz)</h2>'
for album in albums_to_add:
albums_html += format_album(album, is_update=False)
# Albums to UPDATE section
if albums_to_update:
albums_html += '<h2>🔄 Albums to UPDATE (Need Linking/Updates)</h2>'
for album in albums_to_update:
albums_html += format_album(album, is_update=True)
html_content = html_content.format(
add_count=len(albums_to_add),
update_count=len(albums_to_update)
) + albums_html + """
</body>
</html>
"""
output_file = "missing_albums.html"
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"📄 HTML report saved to {output_file}")
if __name__ == "__main__":
main()