Refactor find_missing_albums method to return tuples and enhance album processing

- Updated the return type of find_missing_albums to return a tuple of lists: albums_to_add and albums_to_update.
- Improved response handling and debugging output for API responses.
- Enhanced album categorization based on status indicators (red for missing, orange for needing updates).
- Updated main function to process and display albums to add and update separately.
- Modified HTML report generation to reflect changes in album categorization.
This commit is contained in:
Danilo Reyes
2025-11-11 09:51:29 -06:00
parent 51df3f15db
commit 0dca7474a9

350
main.py
View File

@@ -13,7 +13,7 @@ import requests
import json
import sys
import os
from typing import List, Dict, Optional
from typing import List, Dict, Optional, Tuple
from urllib.parse import quote
from dotenv import load_dotenv
@@ -105,7 +105,7 @@ class SamblClient:
print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr)
return None
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]:
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> Tuple[List[Dict], List[Dict]]:
"""
Find albums missing on MusicBrainz from Deezer releases for an artist.
@@ -117,7 +117,9 @@ class SamblClient:
artist_name: Name of the artist
Returns:
List of album dictionaries with Deezer URLs and metadata
Tuple of (albums_to_add, albums_to_update)
- albums_to_add: Albums not in MusicBrainz (red status, no mbid)
- albums_to_update: Albums in MusicBrainz but need linking/updates (orange status)
Format:
[
{
@@ -125,7 +127,9 @@ class SamblClient:
'deezer_url': 'https://www.deezer.com/album/123456789',
'deezer_id': '123456789',
'release_date': '2024-01-01',
'artist_name': artist_name
'artist_name': artist_name,
'mbid': 'musicbrainz-id' (only for albums_to_update),
'album_issues': ['issue1', 'issue2'] (only for albums_to_update)
}
]
"""
@@ -151,69 +155,123 @@ class SamblClient:
data = response.json()
# Parse the response to extract missing albums
# The response structure may vary, so we'll handle different formats
missing_albums = []
# Debug: Print the raw response structure
print(f" [Sambl] Raw API response structure:")
print(f" [Sambl] Response type: {type(data)}")
if isinstance(data, dict):
print(f" [Sambl] Top-level keys: {list(data.keys())}")
if 'albumData' in data:
album_data = data.get('albumData', [])
print(f" [Sambl] albumData count: {len(album_data)}")
if len(album_data) > 0:
print(f" [Sambl] First album keys: {list(album_data[0].keys()) if isinstance(album_data[0], dict) else 'Not a dict'}")
print(f" [Sambl] First album sample: {json.dumps(album_data[0], indent=2)[:500] if isinstance(album_data[0], dict) else str(album_data[0])[:500]}")
# Check status counts
if 'orange' in data:
print(f" [Sambl] Orange (missing) albums: {data.get('orange', 0)}")
if 'green' in data:
print(f" [Sambl] Green (linked) albums: {data.get('green', 0)}")
if 'red' in data:
print(f" [Sambl] Red albums: {data.get('red', 0)}")
elif isinstance(data, list):
print(f" [Sambl] Response is a list with {len(data)} items")
if len(data) > 0:
print(f" [Sambl] First item keys: {list(data[0].keys()) if isinstance(data[0], dict) else 'Not a dict'}")
print(f" [Sambl] First item sample: {json.dumps(data[0], indent=2)[:500] if isinstance(data[0], dict) else str(data[0])[:500]}")
# Parse the response to extract albums
# SAMBL returns albums in 'albumData' with status indicators:
# - 'red': Not in MusicBrainz (need to add)
# - 'orange': In MusicBrainz but needs linking/updates (need to update)
# - 'green': Properly linked (skip)
albums_to_add = []
albums_to_update = []
# SAMBL typically returns albums with status indicators
# Missing albums are usually marked as not found in MusicBrainz
albums = []
if isinstance(data, dict):
# Check for common response structures
albums = data.get('albums', [])
# SAMBL uses 'albumData' as the key for the albums array
album_data = data.get('albumData')
print(f" [Sambl] albumData type: {type(album_data)}, value: {album_data}")
if isinstance(album_data, list):
albums = album_data
elif isinstance(album_data, dict):
# albumData might be a dict with nested structure
print(f" [Sambl] albumData is dict with keys: {list(album_data.keys()) if album_data else 'None'}")
albums = album_data.get('albums', album_data.get('data', []))
# Fallback to other possible keys
if not albums and isinstance(data.get('albums'), list):
albums = data.get('albums', [])
if not albums and isinstance(data.get('data'), list):
albums = data.get('data', [])
elif isinstance(data, list):
albums = data
for album in albums:
# Look for albums that are missing from MusicBrainz
# SAMBL typically marks these with status like 'missing', 'not_found', etc.
status = str(album.get('status', '')).lower()
mb_status = str(album.get('musicbrainz_status', '')).lower()
# Check if album is missing (not linked to MusicBrainz)
# SAMBL marks missing albums with various indicators
is_missing = (
'missing' in status or
'not_found' in status or
'not_linked' in status or
'orange' in status or # SAMBL uses orange status for albums not linked
album.get('musicbrainz_id') is None or
album.get('musicbrainz_id') == '' or
album.get('mbid') is None or
album.get('mbid') == ''
)
if is_missing:
# Extract Deezer URL and album info
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
if deezer_id and deezer_id != 'None':
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
missing_albums.append({
'title': album.get('title') or album.get('name') or 'Unknown Title',
'deezer_url': deezer_url,
'deezer_id': deezer_id,
'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '',
'artist_name': artist_name,
'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or ''
})
print(f" [Sambl] Processing {len(albums)} album(s) from response")
if missing_albums:
print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)")
else:
print(f" [Sambl] ✓ No missing albums found")
# If we have status counts but no albums, something is wrong
if isinstance(data, dict) and len(albums) == 0:
print(f" [Sambl] ⚠️ Warning: Found status counts but no albums in albumData")
print(f" [Sambl] Full response keys: {list(data.keys())}")
print(f" [Sambl] Total albums reported: {data.get('total', 'N/A')}")
# Try to print a sample of the response structure
print(f" [Sambl] Response sample: {json.dumps(data, indent=2)[:1000]}")
return missing_albums
for idx, album in enumerate(albums):
# Get album status and MusicBrainz ID
album_status = str(album.get('albumStatus', '')).lower()
musicbrainz_id = album.get('mbid') or album.get('musicbrainz_id') or album.get('musicbrainzId') or ''
album_title = album.get('name') or album.get('title') or 'Unknown'
album_issues = album.get('albumIssues', [])
# Debug: Print album details
print(f" [Sambl] Album {idx+1}: {album_title}")
print(f" Status: {album_status or 'N/A'}, MBID: {musicbrainz_id or 'None'}, Issues: {album_issues}")
# Extract Deezer URL and album info
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
if not deezer_id or deezer_id == 'None':
print(f" ⚠️ Skipping - no valid Deezer ID found")
continue
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
album_data = {
'title': album_title,
'deezer_url': deezer_url,
'deezer_id': deezer_id,
'release_date': album.get('releaseDate') or album.get('release_date') or album.get('release') or '',
'artist_name': artist_name,
'cover_url': album.get('imageUrl') or album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or ''
}
# Categorize albums based on status
if album_status == 'red' or not musicbrainz_id or musicbrainz_id == '':
# Red status or no MBID = needs to be added to MusicBrainz
albums_to_add.append(album_data)
print(f" ✓ Added to 'to add' list (not in MusicBrainz)")
elif album_status == 'orange':
# Orange status = in MusicBrainz but needs linking/updates
album_data['mbid'] = musicbrainz_id
album_data['mb_url'] = album.get('albumMBUrl', f'https://musicbrainz.org/release/{musicbrainz_id}')
album_data['album_issues'] = album_issues
albums_to_update.append(album_data)
print(f" ✓ Added to 'to update' list (needs linking/updates)")
else:
# Green status = properly linked, skip
print(f" ✓ Album is properly linked (MBID: {musicbrainz_id})")
print(f" [Sambl] ✓ Found {len(albums_to_add)} album(s) to add, {len(albums_to_update)} album(s) to update")
return albums_to_add, albums_to_update
except requests.exceptions.RequestException as e:
print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr)
return []
return [], []
except (KeyError, ValueError, TypeError) as e:
print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr)
print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
return []
return [], []
class SubmissionLinkGenerator:
@@ -279,7 +337,8 @@ def main():
print(f"Found {total_artists} monitored artists")
print("\n" + "="*80)
all_missing_albums = []
all_albums_to_add = []
all_albums_to_update = []
for artist in artists:
artist_name = artist.get('artistName', 'Unknown')
@@ -292,88 +351,129 @@ def main():
print(f"\n🎵 Artist: {artist_name}")
print(f" MusicBrainz ID: {artist_mbid}")
# Find missing albums using Sambl
missing_albums = sambl.find_missing_albums(artist_mbid, artist_name)
# Find albums using Sambl
albums_to_add, albums_to_update = sambl.find_missing_albums(artist_mbid, artist_name)
if missing_albums:
print(f" Found {len(missing_albums)} missing album(s):")
for album in missing_albums:
# Process albums to add
if albums_to_add:
print(f"\n 📥 Albums to ADD ({len(albums_to_add)}):")
for album in albums_to_add:
deezer_url = album.get('deezer_url')
if deezer_url:
links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links
all_missing_albums.append(album)
album['action'] = 'add'
all_albums_to_add.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
else:
print(f" ✓ No missing albums found")
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
# Process albums to update
if albums_to_update:
print(f"\n 🔄 Albums to UPDATE ({len(albums_to_update)}):")
for album in albums_to_update:
deezer_url = album.get('deezer_url')
mb_url = album.get('mb_url', '')
issues = album.get('album_issues', [])
if deezer_url:
links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links
album['action'] = 'update'
all_albums_to_update.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
if mb_url:
print(f" MusicBrainz: {mb_url}")
if issues:
print(f" Issues: {', '.join(issues)}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
if not albums_to_add and not albums_to_update:
print(f" ✓ All albums are properly linked!")
# Generate summary report
print("\n" + "="*80)
print(f"\n📊 Summary:")
print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
print(f" Total missing albums found: {len(all_missing_albums)}")
print(f" Albums to ADD: {len(all_albums_to_add)}")
print(f" Albums to UPDATE: {len(all_albums_to_update)}")
# Save results to JSON file
if all_missing_albums:
all_albums = all_albums_to_add + all_albums_to_update
if all_albums:
output_file = "missing_albums.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(all_missing_albums, f, indent=2, ensure_ascii=False)
json.dump({
'albums_to_add': all_albums_to_add,
'albums_to_update': all_albums_to_update,
'summary': {
'total_to_add': len(all_albums_to_add),
'total_to_update': len(all_albums_to_update),
'total': len(all_albums)
}
}, f, indent=2, ensure_ascii=False)
print(f"\n💾 Results saved to {output_file}")
# Generate HTML report with clickable links
generate_html_report(all_missing_albums)
generate_html_report(all_albums_to_add, all_albums_to_update)
else:
print("\n✨ All albums are already on MusicBrainz!")
def generate_html_report(albums: List[Dict]):
def generate_html_report(albums_to_add: List[Dict], albums_to_update: List[Dict]):
"""Generate an HTML report with clickable submission links"""
html_content = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Missing Albums - MusicBrainz Submission Links</title>
<title>MusicBrainz Albums - Add & Update</title>
<style>
body {
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}
h1 {
}}
h1 {{
color: #333;
border-bottom: 3px solid #4CAF50;
padding-bottom: 10px;
}
.album {
}}
h2 {{
color: #2196F3;
margin-top: 30px;
border-bottom: 2px solid #2196F3;
padding-bottom: 5px;
}}
.album {{
background: white;
border-radius: 8px;
padding: 20px;
margin: 20px 0;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.album-title {
}}
.album-title {{
font-size: 1.5em;
font-weight: bold;
color: #2196F3;
margin-bottom: 10px;
}
.artist-name {
}}
.artist-name {{
color: #666;
margin-bottom: 15px;
}
.links {
}}
.links {{
display: flex;
gap: 10px;
flex-wrap: wrap;
}
.link-button {
}}
.link-button {{
display: inline-block;
padding: 10px 20px;
background-color: #4CAF50;
@@ -381,40 +481,51 @@ def generate_html_report(albums: List[Dict]):
text-decoration: none;
border-radius: 5px;
transition: background-color 0.3s;
}
.link-button:hover {
}}
.link-button:hover {{
background-color: #45a049;
}
.link-button.atisket {
}}
.link-button.atisket {{
background-color: #2196F3;
}
.link-button.atisket:hover {
}}
.link-button.atisket:hover {{
background-color: #0b7dda;
}
.link-button.harmony {
}}
.link-button.harmony {{
background-color: #FF9800;
}
.link-button.harmony:hover {
}}
.link-button.harmony:hover {{
background-color: #e68900;
}
.deezer-link {
}}
.deezer-link {{
color: #666;
font-size: 0.9em;
margin-top: 10px;
}
.summary {
}}
.mb-link {{
color: #666;
font-size: 0.9em;
margin-top: 5px;
}}
.issues {{
color: #FF9800;
font-size: 0.9em;
margin-top: 5px;
font-style: italic;
}}
.summary {{
background: white;
padding: 15px;
border-radius: 8px;
margin-bottom: 20px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
}}
</style>
</head>
<body>
<h1>🎵 Missing Albums - MusicBrainz Submission Links</h1>
<h1>🎵 MusicBrainz Albums - Add & Update</h1>
<div class="summary">
<strong>Total missing albums: {count}</strong>
<strong>Albums to ADD: {add_count}</strong> | <strong>Albums to UPDATE: {update_count}</strong>
</div>
"""
@@ -422,6 +533,8 @@ def generate_html_report(albums: List[Dict]):
<div class="album">
<div class="album-title">{title}</div>
<div class="artist-name">by {artist}</div>
{mb_info}
{issues_info}
<div class="links">
<a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
<a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
@@ -432,18 +545,47 @@ def generate_html_report(albums: List[Dict]):
</div>
"""
albums_html = ""
for album in albums:
def format_album(album, is_update=False):
submission_links = album.get('submission_links', {})
albums_html += album_html.format(
mb_info = ""
issues_info = ""
if is_update:
mb_url = album.get('mb_url', '')
if mb_url:
mb_info = f'<div class="mb-link"><a href="{mb_url}" target="_blank">View on MusicBrainz</a></div>'
issues = album.get('album_issues', [])
if issues:
issues_info = f'<div class="issues">Issues: {", ".join(issues)}</div>'
return album_html.format(
title=album.get('title', 'Unknown Title'),
artist=album.get('artist_name', 'Unknown Artist'),
mb_info=mb_info,
issues_info=issues_info,
atisket_link=submission_links.get('atisket_link', '#'),
harmony_link=submission_links.get('harmony_link', '#'),
deezer_url=submission_links.get('deezer_url', '#')
)
html_content = html_content.format(count=len(albums)) + albums_html + """
albums_html = ""
# Albums to ADD section
if albums_to_add:
albums_html += '<h2>📥 Albums to ADD (Not in MusicBrainz)</h2>'
for album in albums_to_add:
albums_html += format_album(album, is_update=False)
# Albums to UPDATE section
if albums_to_update:
albums_html += '<h2>🔄 Albums to UPDATE (Need Linking/Updates)</h2>'
for album in albums_to_update:
albums_html += format_album(album, is_update=True)
html_content = html_content.format(
add_count=len(albums_to_add),
update_count=len(albums_to_update)
) + albums_html + """
</body>
</html>
"""