Refactor find_missing_albums method to return tuples and enhance album processing

- Updated the return type of find_missing_albums to return a tuple of lists: albums_to_add and albums_to_update.
- Improved response handling and debugging output for API responses.
- Enhanced album categorization based on status indicators (red for missing, orange for needing updates).
- Updated main function to process and display albums to add and update separately.
- Modified HTML report generation to reflect changes in album categorization.
This commit is contained in:
Danilo Reyes
2025-11-11 09:51:29 -06:00
parent 51df3f15db
commit 0dca7474a9

344
main.py
View File

@@ -13,7 +13,7 @@ import requests
import json import json
import sys import sys
import os import os
from typing import List, Dict, Optional from typing import List, Dict, Optional, Tuple
from urllib.parse import quote from urllib.parse import quote
from dotenv import load_dotenv from dotenv import load_dotenv
@@ -105,7 +105,7 @@ class SamblClient:
print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr) print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr)
return None return None
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]: def find_missing_albums(self, artist_mbid: str, artist_name: str) -> Tuple[List[Dict], List[Dict]]:
""" """
Find albums missing on MusicBrainz from Deezer releases for an artist. Find albums missing on MusicBrainz from Deezer releases for an artist.
@@ -117,7 +117,9 @@ class SamblClient:
artist_name: Name of the artist artist_name: Name of the artist
Returns: Returns:
List of album dictionaries with Deezer URLs and metadata Tuple of (albums_to_add, albums_to_update)
- albums_to_add: Albums not in MusicBrainz (red status, no mbid)
- albums_to_update: Albums in MusicBrainz but need linking/updates (orange status)
Format: Format:
[ [
{ {
@@ -125,7 +127,9 @@ class SamblClient:
'deezer_url': 'https://www.deezer.com/album/123456789', 'deezer_url': 'https://www.deezer.com/album/123456789',
'deezer_id': '123456789', 'deezer_id': '123456789',
'release_date': '2024-01-01', 'release_date': '2024-01-01',
'artist_name': artist_name 'artist_name': artist_name,
'mbid': 'musicbrainz-id' (only for albums_to_update),
'album_issues': ['issue1', 'issue2'] (only for albums_to_update)
} }
] ]
""" """
@@ -151,69 +155,123 @@ class SamblClient:
data = response.json() data = response.json()
# Parse the response to extract missing albums # Debug: Print the raw response structure
# The response structure may vary, so we'll handle different formats print(f" [Sambl] Raw API response structure:")
missing_albums = [] print(f" [Sambl] Response type: {type(data)}")
if isinstance(data, dict):
print(f" [Sambl] Top-level keys: {list(data.keys())}")
if 'albumData' in data:
album_data = data.get('albumData', [])
print(f" [Sambl] albumData count: {len(album_data)}")
if len(album_data) > 0:
print(f" [Sambl] First album keys: {list(album_data[0].keys()) if isinstance(album_data[0], dict) else 'Not a dict'}")
print(f" [Sambl] First album sample: {json.dumps(album_data[0], indent=2)[:500] if isinstance(album_data[0], dict) else str(album_data[0])[:500]}")
# Check status counts
if 'orange' in data:
print(f" [Sambl] Orange (missing) albums: {data.get('orange', 0)}")
if 'green' in data:
print(f" [Sambl] Green (linked) albums: {data.get('green', 0)}")
if 'red' in data:
print(f" [Sambl] Red albums: {data.get('red', 0)}")
elif isinstance(data, list):
print(f" [Sambl] Response is a list with {len(data)} items")
if len(data) > 0:
print(f" [Sambl] First item keys: {list(data[0].keys()) if isinstance(data[0], dict) else 'Not a dict'}")
print(f" [Sambl] First item sample: {json.dumps(data[0], indent=2)[:500] if isinstance(data[0], dict) else str(data[0])[:500]}")
# Parse the response to extract albums
# SAMBL returns albums in 'albumData' with status indicators:
# - 'red': Not in MusicBrainz (need to add)
# - 'orange': In MusicBrainz but needs linking/updates (need to update)
# - 'green': Properly linked (skip)
albums_to_add = []
albums_to_update = []
# SAMBL typically returns albums with status indicators
# Missing albums are usually marked as not found in MusicBrainz
albums = [] albums = []
if isinstance(data, dict): if isinstance(data, dict):
# Check for common response structures # SAMBL uses 'albumData' as the key for the albums array
albums = data.get('albums', []) album_data = data.get('albumData')
print(f" [Sambl] albumData type: {type(album_data)}, value: {album_data}")
if isinstance(album_data, list):
albums = album_data
elif isinstance(album_data, dict):
# albumData might be a dict with nested structure
print(f" [Sambl] albumData is dict with keys: {list(album_data.keys()) if album_data else 'None'}")
albums = album_data.get('albums', album_data.get('data', []))
# Fallback to other possible keys
if not albums and isinstance(data.get('albums'), list):
albums = data.get('albums', [])
if not albums and isinstance(data.get('data'), list): if not albums and isinstance(data.get('data'), list):
albums = data.get('data', []) albums = data.get('data', [])
elif isinstance(data, list): elif isinstance(data, list):
albums = data albums = data
for album in albums: print(f" [Sambl] Processing {len(albums)} album(s) from response")
# Look for albums that are missing from MusicBrainz
# SAMBL typically marks these with status like 'missing', 'not_found', etc.
status = str(album.get('status', '')).lower()
mb_status = str(album.get('musicbrainz_status', '')).lower()
# Check if album is missing (not linked to MusicBrainz) # If we have status counts but no albums, something is wrong
# SAMBL marks missing albums with various indicators if isinstance(data, dict) and len(albums) == 0:
is_missing = ( print(f" [Sambl] ⚠️ Warning: Found status counts but no albums in albumData")
'missing' in status or print(f" [Sambl] Full response keys: {list(data.keys())}")
'not_found' in status or print(f" [Sambl] Total albums reported: {data.get('total', 'N/A')}")
'not_linked' in status or # Try to print a sample of the response structure
'orange' in status or # SAMBL uses orange status for albums not linked print(f" [Sambl] Response sample: {json.dumps(data, indent=2)[:1000]}")
album.get('musicbrainz_id') is None or
album.get('musicbrainz_id') == '' or
album.get('mbid') is None or
album.get('mbid') == ''
)
if is_missing: for idx, album in enumerate(albums):
# Extract Deezer URL and album info # Get album status and MusicBrainz ID
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '') album_status = str(album.get('albumStatus', '')).lower()
if deezer_id and deezer_id != 'None': musicbrainz_id = album.get('mbid') or album.get('musicbrainz_id') or album.get('musicbrainzId') or ''
deezer_url = f"https://www.deezer.com/album/{deezer_id}" album_title = album.get('name') or album.get('title') or 'Unknown'
album_issues = album.get('albumIssues', [])
missing_albums.append({ # Debug: Print album details
'title': album.get('title') or album.get('name') or 'Unknown Title', print(f" [Sambl] Album {idx+1}: {album_title}")
'deezer_url': deezer_url, print(f" Status: {album_status or 'N/A'}, MBID: {musicbrainz_id or 'None'}, Issues: {album_issues}")
'deezer_id': deezer_id,
'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '',
'artist_name': artist_name,
'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or ''
})
if missing_albums: # Extract Deezer URL and album info
print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)") deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
else: if not deezer_id or deezer_id == 'None':
print(f" [Sambl] ✓ No missing albums found") print(f" ⚠️ Skipping - no valid Deezer ID found")
continue
return missing_albums deezer_url = f"https://www.deezer.com/album/{deezer_id}"
album_data = {
'title': album_title,
'deezer_url': deezer_url,
'deezer_id': deezer_id,
'release_date': album.get('releaseDate') or album.get('release_date') or album.get('release') or '',
'artist_name': artist_name,
'cover_url': album.get('imageUrl') or album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or ''
}
# Categorize albums based on status
if album_status == 'red' or not musicbrainz_id or musicbrainz_id == '':
# Red status or no MBID = needs to be added to MusicBrainz
albums_to_add.append(album_data)
print(f" ✓ Added to 'to add' list (not in MusicBrainz)")
elif album_status == 'orange':
# Orange status = in MusicBrainz but needs linking/updates
album_data['mbid'] = musicbrainz_id
album_data['mb_url'] = album.get('albumMBUrl', f'https://musicbrainz.org/release/{musicbrainz_id}')
album_data['album_issues'] = album_issues
albums_to_update.append(album_data)
print(f" ✓ Added to 'to update' list (needs linking/updates)")
else:
# Green status = properly linked, skip
print(f" ✓ Album is properly linked (MBID: {musicbrainz_id})")
print(f" [Sambl] ✓ Found {len(albums_to_add)} album(s) to add, {len(albums_to_update)} album(s) to update")
return albums_to_add, albums_to_update
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException as e:
print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr) print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr)
return [] return [], []
except (KeyError, ValueError, TypeError) as e: except (KeyError, ValueError, TypeError) as e:
print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr) print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr)
print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr) print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
return [] return [], []
class SubmissionLinkGenerator: class SubmissionLinkGenerator:
@@ -279,7 +337,8 @@ def main():
print(f"Found {total_artists} monitored artists") print(f"Found {total_artists} monitored artists")
print("\n" + "="*80) print("\n" + "="*80)
all_missing_albums = [] all_albums_to_add = []
all_albums_to_update = []
for artist in artists: for artist in artists:
artist_name = artist.get('artistName', 'Unknown') artist_name = artist.get('artistName', 'Unknown')
@@ -292,88 +351,129 @@ def main():
print(f"\n🎵 Artist: {artist_name}") print(f"\n🎵 Artist: {artist_name}")
print(f" MusicBrainz ID: {artist_mbid}") print(f" MusicBrainz ID: {artist_mbid}")
# Find missing albums using Sambl # Find albums using Sambl
missing_albums = sambl.find_missing_albums(artist_mbid, artist_name) albums_to_add, albums_to_update = sambl.find_missing_albums(artist_mbid, artist_name)
if missing_albums: # Process albums to add
print(f" Found {len(missing_albums)} missing album(s):") if albums_to_add:
for album in missing_albums: print(f"\n 📥 Albums to ADD ({len(albums_to_add)}):")
for album in albums_to_add:
deezer_url = album.get('deezer_url') deezer_url = album.get('deezer_url')
if deezer_url: if deezer_url:
links = SubmissionLinkGenerator.generate_links(deezer_url) links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links album['submission_links'] = links
all_missing_albums.append(album) album['action'] = 'add'
all_albums_to_add.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}") print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}") print(f" Deezer: {deezer_url}")
print(f" a-tisket: {links['atisket_link']}") print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}") print(f" Harmony: {links['harmony_link']}")
else:
print(f" ✓ No missing albums found") # Process albums to update
if albums_to_update:
print(f"\n 🔄 Albums to UPDATE ({len(albums_to_update)}):")
for album in albums_to_update:
deezer_url = album.get('deezer_url')
mb_url = album.get('mb_url', '')
issues = album.get('album_issues', [])
if deezer_url:
links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links
album['action'] = 'update'
all_albums_to_update.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
if mb_url:
print(f" MusicBrainz: {mb_url}")
if issues:
print(f" Issues: {', '.join(issues)}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
if not albums_to_add and not albums_to_update:
print(f" ✓ All albums are properly linked!")
# Generate summary report # Generate summary report
print("\n" + "="*80) print("\n" + "="*80)
print(f"\n📊 Summary:") print(f"\n📊 Summary:")
print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else "")) print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
print(f" Total missing albums found: {len(all_missing_albums)}") print(f" Albums to ADD: {len(all_albums_to_add)}")
print(f" Albums to UPDATE: {len(all_albums_to_update)}")
# Save results to JSON file # Save results to JSON file
if all_missing_albums: all_albums = all_albums_to_add + all_albums_to_update
if all_albums:
output_file = "missing_albums.json" output_file = "missing_albums.json"
with open(output_file, 'w', encoding='utf-8') as f: with open(output_file, 'w', encoding='utf-8') as f:
json.dump(all_missing_albums, f, indent=2, ensure_ascii=False) json.dump({
'albums_to_add': all_albums_to_add,
'albums_to_update': all_albums_to_update,
'summary': {
'total_to_add': len(all_albums_to_add),
'total_to_update': len(all_albums_to_update),
'total': len(all_albums)
}
}, f, indent=2, ensure_ascii=False)
print(f"\n💾 Results saved to {output_file}") print(f"\n💾 Results saved to {output_file}")
# Generate HTML report with clickable links # Generate HTML report with clickable links
generate_html_report(all_missing_albums) generate_html_report(all_albums_to_add, all_albums_to_update)
else: else:
print("\n✨ All albums are already on MusicBrainz!") print("\n✨ All albums are already on MusicBrainz!")
def generate_html_report(albums: List[Dict]): def generate_html_report(albums_to_add: List[Dict], albums_to_update: List[Dict]):
"""Generate an HTML report with clickable submission links""" """Generate an HTML report with clickable submission links"""
html_content = """<!DOCTYPE html> html_content = """<!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Missing Albums - MusicBrainz Submission Links</title> <title>MusicBrainz Albums - Add & Update</title>
<style> <style>
body { body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
max-width: 1200px; max-width: 1200px;
margin: 0 auto; margin: 0 auto;
padding: 20px; padding: 20px;
background-color: #f5f5f5; background-color: #f5f5f5;
} }}
h1 { h1 {{
color: #333; color: #333;
border-bottom: 3px solid #4CAF50; border-bottom: 3px solid #4CAF50;
padding-bottom: 10px; padding-bottom: 10px;
} }}
.album { h2 {{
color: #2196F3;
margin-top: 30px;
border-bottom: 2px solid #2196F3;
padding-bottom: 5px;
}}
.album {{
background: white; background: white;
border-radius: 8px; border-radius: 8px;
padding: 20px; padding: 20px;
margin: 20px 0; margin: 20px 0;
box-shadow: 0 2px 4px rgba(0,0,0,0.1); box-shadow: 0 2px 4px rgba(0,0,0,0.1);
} }}
.album-title { .album-title {{
font-size: 1.5em; font-size: 1.5em;
font-weight: bold; font-weight: bold;
color: #2196F3; color: #2196F3;
margin-bottom: 10px; margin-bottom: 10px;
} }}
.artist-name { .artist-name {{
color: #666; color: #666;
margin-bottom: 15px; margin-bottom: 15px;
} }}
.links { .links {{
display: flex; display: flex;
gap: 10px; gap: 10px;
flex-wrap: wrap; flex-wrap: wrap;
} }}
.link-button { .link-button {{
display: inline-block; display: inline-block;
padding: 10px 20px; padding: 10px 20px;
background-color: #4CAF50; background-color: #4CAF50;
@@ -381,40 +481,51 @@ def generate_html_report(albums: List[Dict]):
text-decoration: none; text-decoration: none;
border-radius: 5px; border-radius: 5px;
transition: background-color 0.3s; transition: background-color 0.3s;
} }}
.link-button:hover { .link-button:hover {{
background-color: #45a049; background-color: #45a049;
} }}
.link-button.atisket { .link-button.atisket {{
background-color: #2196F3; background-color: #2196F3;
} }}
.link-button.atisket:hover { .link-button.atisket:hover {{
background-color: #0b7dda; background-color: #0b7dda;
} }}
.link-button.harmony { .link-button.harmony {{
background-color: #FF9800; background-color: #FF9800;
} }}
.link-button.harmony:hover { .link-button.harmony:hover {{
background-color: #e68900; background-color: #e68900;
} }}
.deezer-link { .deezer-link {{
color: #666; color: #666;
font-size: 0.9em; font-size: 0.9em;
margin-top: 10px; margin-top: 10px;
} }}
.summary { .mb-link {{
color: #666;
font-size: 0.9em;
margin-top: 5px;
}}
.issues {{
color: #FF9800;
font-size: 0.9em;
margin-top: 5px;
font-style: italic;
}}
.summary {{
background: white; background: white;
padding: 15px; padding: 15px;
border-radius: 8px; border-radius: 8px;
margin-bottom: 20px; margin-bottom: 20px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1); box-shadow: 0 2px 4px rgba(0,0,0,0.1);
} }}
</style> </style>
</head> </head>
<body> <body>
<h1>🎵 Missing Albums - MusicBrainz Submission Links</h1> <h1>🎵 MusicBrainz Albums - Add & Update</h1>
<div class="summary"> <div class="summary">
<strong>Total missing albums: {count}</strong> <strong>Albums to ADD: {add_count}</strong> | <strong>Albums to UPDATE: {update_count}</strong>
</div> </div>
""" """
@@ -422,6 +533,8 @@ def generate_html_report(albums: List[Dict]):
<div class="album"> <div class="album">
<div class="album-title">{title}</div> <div class="album-title">{title}</div>
<div class="artist-name">by {artist}</div> <div class="artist-name">by {artist}</div>
{mb_info}
{issues_info}
<div class="links"> <div class="links">
<a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a> <a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
<a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a> <a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
@@ -432,18 +545,47 @@ def generate_html_report(albums: List[Dict]):
</div> </div>
""" """
albums_html = "" def format_album(album, is_update=False):
for album in albums:
submission_links = album.get('submission_links', {}) submission_links = album.get('submission_links', {})
albums_html += album_html.format( mb_info = ""
issues_info = ""
if is_update:
mb_url = album.get('mb_url', '')
if mb_url:
mb_info = f'<div class="mb-link"><a href="{mb_url}" target="_blank">View on MusicBrainz</a></div>'
issues = album.get('album_issues', [])
if issues:
issues_info = f'<div class="issues">Issues: {", ".join(issues)}</div>'
return album_html.format(
title=album.get('title', 'Unknown Title'), title=album.get('title', 'Unknown Title'),
artist=album.get('artist_name', 'Unknown Artist'), artist=album.get('artist_name', 'Unknown Artist'),
mb_info=mb_info,
issues_info=issues_info,
atisket_link=submission_links.get('atisket_link', '#'), atisket_link=submission_links.get('atisket_link', '#'),
harmony_link=submission_links.get('harmony_link', '#'), harmony_link=submission_links.get('harmony_link', '#'),
deezer_url=submission_links.get('deezer_url', '#') deezer_url=submission_links.get('deezer_url', '#')
) )
html_content = html_content.format(count=len(albums)) + albums_html + """ albums_html = ""
# Albums to ADD section
if albums_to_add:
albums_html += '<h2>📥 Albums to ADD (Not in MusicBrainz)</h2>'
for album in albums_to_add:
albums_html += format_album(album, is_update=False)
# Albums to UPDATE section
if albums_to_update:
albums_html += '<h2>🔄 Albums to UPDATE (Need Linking/Updates)</h2>'
for album in albums_to_update:
albums_html += format_album(album, is_update=True)
html_content = html_content.format(
add_count=len(albums_to_add),
update_count=len(albums_to_update)
) + albums_html + """
</body> </body>
</html> </html>
""" """