diff --git a/main.py b/main.py index 64f67ad..13aade1 100755 --- a/main.py +++ b/main.py @@ -13,7 +13,7 @@ import requests import json import sys import os -from typing import List, Dict, Optional +from typing import List, Dict, Optional, Tuple from urllib.parse import quote from dotenv import load_dotenv @@ -105,7 +105,7 @@ class SamblClient: print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr) return None - def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]: + def find_missing_albums(self, artist_mbid: str, artist_name: str) -> Tuple[List[Dict], List[Dict]]: """ Find albums missing on MusicBrainz from Deezer releases for an artist. @@ -117,7 +117,9 @@ class SamblClient: artist_name: Name of the artist Returns: - List of album dictionaries with Deezer URLs and metadata + Tuple of (albums_to_add, albums_to_update) + - albums_to_add: Albums not in MusicBrainz (red status, no mbid) + - albums_to_update: Albums in MusicBrainz but need linking/updates (orange status) Format: [ { @@ -125,7 +127,9 @@ class SamblClient: 'deezer_url': 'https://www.deezer.com/album/123456789', 'deezer_id': '123456789', 'release_date': '2024-01-01', - 'artist_name': artist_name + 'artist_name': artist_name, + 'mbid': 'musicbrainz-id' (only for albums_to_update), + 'album_issues': ['issue1', 'issue2'] (only for albums_to_update) } ] """ @@ -151,69 +155,123 @@ class SamblClient: data = response.json() - # Parse the response to extract missing albums - # The response structure may vary, so we'll handle different formats - missing_albums = [] + # Debug: Print the raw response structure + print(f" [Sambl] Raw API response structure:") + print(f" [Sambl] Response type: {type(data)}") + if isinstance(data, dict): + print(f" [Sambl] Top-level keys: {list(data.keys())}") + if 'albumData' in data: + album_data = data.get('albumData', []) + print(f" [Sambl] albumData count: {len(album_data)}") + if len(album_data) > 0: + print(f" [Sambl] First album keys: {list(album_data[0].keys()) if isinstance(album_data[0], dict) else 'Not a dict'}") + print(f" [Sambl] First album sample: {json.dumps(album_data[0], indent=2)[:500] if isinstance(album_data[0], dict) else str(album_data[0])[:500]}") + # Check status counts + if 'orange' in data: + print(f" [Sambl] Orange (missing) albums: {data.get('orange', 0)}") + if 'green' in data: + print(f" [Sambl] Green (linked) albums: {data.get('green', 0)}") + if 'red' in data: + print(f" [Sambl] Red albums: {data.get('red', 0)}") + elif isinstance(data, list): + print(f" [Sambl] Response is a list with {len(data)} items") + if len(data) > 0: + print(f" [Sambl] First item keys: {list(data[0].keys()) if isinstance(data[0], dict) else 'Not a dict'}") + print(f" [Sambl] First item sample: {json.dumps(data[0], indent=2)[:500] if isinstance(data[0], dict) else str(data[0])[:500]}") + + # Parse the response to extract albums + # SAMBL returns albums in 'albumData' with status indicators: + # - 'red': Not in MusicBrainz (need to add) + # - 'orange': In MusicBrainz but needs linking/updates (need to update) + # - 'green': Properly linked (skip) + albums_to_add = [] + albums_to_update = [] - # SAMBL typically returns albums with status indicators - # Missing albums are usually marked as not found in MusicBrainz albums = [] if isinstance(data, dict): - # Check for common response structures - albums = data.get('albums', []) + # SAMBL uses 'albumData' as the key for the albums array + album_data = data.get('albumData') + print(f" [Sambl] albumData type: {type(album_data)}, value: {album_data}") + + if isinstance(album_data, list): + albums = album_data + elif isinstance(album_data, dict): + # albumData might be a dict with nested structure + print(f" [Sambl] albumData is dict with keys: {list(album_data.keys()) if album_data else 'None'}") + albums = album_data.get('albums', album_data.get('data', [])) + + # Fallback to other possible keys + if not albums and isinstance(data.get('albums'), list): + albums = data.get('albums', []) if not albums and isinstance(data.get('data'), list): albums = data.get('data', []) elif isinstance(data, list): albums = data - for album in albums: - # Look for albums that are missing from MusicBrainz - # SAMBL typically marks these with status like 'missing', 'not_found', etc. - status = str(album.get('status', '')).lower() - mb_status = str(album.get('musicbrainz_status', '')).lower() - - # Check if album is missing (not linked to MusicBrainz) - # SAMBL marks missing albums with various indicators - is_missing = ( - 'missing' in status or - 'not_found' in status or - 'not_linked' in status or - 'orange' in status or # SAMBL uses orange status for albums not linked - album.get('musicbrainz_id') is None or - album.get('musicbrainz_id') == '' or - album.get('mbid') is None or - album.get('mbid') == '' - ) - - if is_missing: - # Extract Deezer URL and album info - deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '') - if deezer_id and deezer_id != 'None': - deezer_url = f"https://www.deezer.com/album/{deezer_id}" - - missing_albums.append({ - 'title': album.get('title') or album.get('name') or 'Unknown Title', - 'deezer_url': deezer_url, - 'deezer_id': deezer_id, - 'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '', - 'artist_name': artist_name, - 'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or '' - }) + print(f" [Sambl] Processing {len(albums)} album(s) from response") - if missing_albums: - print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)") - else: - print(f" [Sambl] ✓ No missing albums found") + # If we have status counts but no albums, something is wrong + if isinstance(data, dict) and len(albums) == 0: + print(f" [Sambl] ⚠️ Warning: Found status counts but no albums in albumData") + print(f" [Sambl] Full response keys: {list(data.keys())}") + print(f" [Sambl] Total albums reported: {data.get('total', 'N/A')}") + # Try to print a sample of the response structure + print(f" [Sambl] Response sample: {json.dumps(data, indent=2)[:1000]}") - return missing_albums + for idx, album in enumerate(albums): + # Get album status and MusicBrainz ID + album_status = str(album.get('albumStatus', '')).lower() + musicbrainz_id = album.get('mbid') or album.get('musicbrainz_id') or album.get('musicbrainzId') or '' + album_title = album.get('name') or album.get('title') or 'Unknown' + album_issues = album.get('albumIssues', []) + + # Debug: Print album details + print(f" [Sambl] Album {idx+1}: {album_title}") + print(f" Status: {album_status or 'N/A'}, MBID: {musicbrainz_id or 'None'}, Issues: {album_issues}") + + # Extract Deezer URL and album info + deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '') + if not deezer_id or deezer_id == 'None': + print(f" ⚠️ Skipping - no valid Deezer ID found") + continue + + deezer_url = f"https://www.deezer.com/album/{deezer_id}" + album_data = { + 'title': album_title, + 'deezer_url': deezer_url, + 'deezer_id': deezer_id, + 'release_date': album.get('releaseDate') or album.get('release_date') or album.get('release') or '', + 'artist_name': artist_name, + 'cover_url': album.get('imageUrl') or album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or '' + } + + # Categorize albums based on status + if album_status == 'red' or not musicbrainz_id or musicbrainz_id == '': + # Red status or no MBID = needs to be added to MusicBrainz + albums_to_add.append(album_data) + print(f" ✓ Added to 'to add' list (not in MusicBrainz)") + elif album_status == 'orange': + # Orange status = in MusicBrainz but needs linking/updates + album_data['mbid'] = musicbrainz_id + album_data['mb_url'] = album.get('albumMBUrl', f'https://musicbrainz.org/release/{musicbrainz_id}') + album_data['album_issues'] = album_issues + albums_to_update.append(album_data) + print(f" ✓ Added to 'to update' list (needs linking/updates)") + else: + # Green status = properly linked, skip + print(f" ✓ Album is properly linked (MBID: {musicbrainz_id})") + + print(f" [Sambl] ✓ Found {len(albums_to_add)} album(s) to add, {len(albums_to_update)} album(s) to update") + + return albums_to_add, albums_to_update except requests.exceptions.RequestException as e: print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr) - return [] + return [], [] except (KeyError, ValueError, TypeError) as e: print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr) print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr) - return [] + return [], [] class SubmissionLinkGenerator: @@ -279,7 +337,8 @@ def main(): print(f"Found {total_artists} monitored artists") print("\n" + "="*80) - all_missing_albums = [] + all_albums_to_add = [] + all_albums_to_update = [] for artist in artists: artist_name = artist.get('artistName', 'Unknown') @@ -292,88 +351,129 @@ def main(): print(f"\n🎵 Artist: {artist_name}") print(f" MusicBrainz ID: {artist_mbid}") - # Find missing albums using Sambl - missing_albums = sambl.find_missing_albums(artist_mbid, artist_name) + # Find albums using Sambl + albums_to_add, albums_to_update = sambl.find_missing_albums(artist_mbid, artist_name) - if missing_albums: - print(f" Found {len(missing_albums)} missing album(s):") - for album in missing_albums: + # Process albums to add + if albums_to_add: + print(f"\n 📥 Albums to ADD ({len(albums_to_add)}):") + for album in albums_to_add: deezer_url = album.get('deezer_url') if deezer_url: links = SubmissionLinkGenerator.generate_links(deezer_url) album['submission_links'] = links - all_missing_albums.append(album) + album['action'] = 'add' + all_albums_to_add.append(album) - print(f" 📀 {album.get('title', 'Unknown Title')}") - print(f" Deezer: {deezer_url}") - print(f" a-tisket: {links['atisket_link']}") - print(f" Harmony: {links['harmony_link']}") - else: - print(f" ✓ No missing albums found") + print(f" 📀 {album.get('title', 'Unknown Title')}") + print(f" Deezer: {deezer_url}") + print(f" a-tisket: {links['atisket_link']}") + print(f" Harmony: {links['harmony_link']}") + + # Process albums to update + if albums_to_update: + print(f"\n 🔄 Albums to UPDATE ({len(albums_to_update)}):") + for album in albums_to_update: + deezer_url = album.get('deezer_url') + mb_url = album.get('mb_url', '') + issues = album.get('album_issues', []) + if deezer_url: + links = SubmissionLinkGenerator.generate_links(deezer_url) + album['submission_links'] = links + album['action'] = 'update' + all_albums_to_update.append(album) + + print(f" 📀 {album.get('title', 'Unknown Title')}") + print(f" Deezer: {deezer_url}") + if mb_url: + print(f" MusicBrainz: {mb_url}") + if issues: + print(f" Issues: {', '.join(issues)}") + print(f" a-tisket: {links['atisket_link']}") + print(f" Harmony: {links['harmony_link']}") + + if not albums_to_add and not albums_to_update: + print(f" ✓ All albums are properly linked!") # Generate summary report print("\n" + "="*80) print(f"\n📊 Summary:") print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else "")) - print(f" Total missing albums found: {len(all_missing_albums)}") + print(f" Albums to ADD: {len(all_albums_to_add)}") + print(f" Albums to UPDATE: {len(all_albums_to_update)}") # Save results to JSON file - if all_missing_albums: + all_albums = all_albums_to_add + all_albums_to_update + if all_albums: output_file = "missing_albums.json" with open(output_file, 'w', encoding='utf-8') as f: - json.dump(all_missing_albums, f, indent=2, ensure_ascii=False) + json.dump({ + 'albums_to_add': all_albums_to_add, + 'albums_to_update': all_albums_to_update, + 'summary': { + 'total_to_add': len(all_albums_to_add), + 'total_to_update': len(all_albums_to_update), + 'total': len(all_albums) + } + }, f, indent=2, ensure_ascii=False) print(f"\n💾 Results saved to {output_file}") # Generate HTML report with clickable links - generate_html_report(all_missing_albums) + generate_html_report(all_albums_to_add, all_albums_to_update) else: print("\n✨ All albums are already on MusicBrainz!") -def generate_html_report(albums: List[Dict]): +def generate_html_report(albums_to_add: List[Dict], albums_to_update: List[Dict]): """Generate an HTML report with clickable submission links""" html_content = """
-