Refactor find_missing_albums method to return tuples and enhance album processing
- Updated the return type of find_missing_albums to return a tuple of lists: albums_to_add and albums_to_update. - Improved response handling and debugging output for API responses. - Enhanced album categorization based on status indicators (red for missing, orange for needing updates). - Updated main function to process and display albums to add and update separately. - Modified HTML report generation to reflect changes in album categorization.
This commit is contained in:
344
main.py
344
main.py
@@ -13,7 +13,7 @@ import requests
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
from typing import List, Dict, Optional
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from urllib.parse import quote
|
||||
from dotenv import load_dotenv
|
||||
|
||||
@@ -105,7 +105,7 @@ class SamblClient:
|
||||
print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]:
|
||||
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> Tuple[List[Dict], List[Dict]]:
|
||||
"""
|
||||
Find albums missing on MusicBrainz from Deezer releases for an artist.
|
||||
|
||||
@@ -117,7 +117,9 @@ class SamblClient:
|
||||
artist_name: Name of the artist
|
||||
|
||||
Returns:
|
||||
List of album dictionaries with Deezer URLs and metadata
|
||||
Tuple of (albums_to_add, albums_to_update)
|
||||
- albums_to_add: Albums not in MusicBrainz (red status, no mbid)
|
||||
- albums_to_update: Albums in MusicBrainz but need linking/updates (orange status)
|
||||
Format:
|
||||
[
|
||||
{
|
||||
@@ -125,7 +127,9 @@ class SamblClient:
|
||||
'deezer_url': 'https://www.deezer.com/album/123456789',
|
||||
'deezer_id': '123456789',
|
||||
'release_date': '2024-01-01',
|
||||
'artist_name': artist_name
|
||||
'artist_name': artist_name,
|
||||
'mbid': 'musicbrainz-id' (only for albums_to_update),
|
||||
'album_issues': ['issue1', 'issue2'] (only for albums_to_update)
|
||||
}
|
||||
]
|
||||
"""
|
||||
@@ -151,69 +155,123 @@ class SamblClient:
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Parse the response to extract missing albums
|
||||
# The response structure may vary, so we'll handle different formats
|
||||
missing_albums = []
|
||||
# Debug: Print the raw response structure
|
||||
print(f" [Sambl] Raw API response structure:")
|
||||
print(f" [Sambl] Response type: {type(data)}")
|
||||
if isinstance(data, dict):
|
||||
print(f" [Sambl] Top-level keys: {list(data.keys())}")
|
||||
if 'albumData' in data:
|
||||
album_data = data.get('albumData', [])
|
||||
print(f" [Sambl] albumData count: {len(album_data)}")
|
||||
if len(album_data) > 0:
|
||||
print(f" [Sambl] First album keys: {list(album_data[0].keys()) if isinstance(album_data[0], dict) else 'Not a dict'}")
|
||||
print(f" [Sambl] First album sample: {json.dumps(album_data[0], indent=2)[:500] if isinstance(album_data[0], dict) else str(album_data[0])[:500]}")
|
||||
# Check status counts
|
||||
if 'orange' in data:
|
||||
print(f" [Sambl] Orange (missing) albums: {data.get('orange', 0)}")
|
||||
if 'green' in data:
|
||||
print(f" [Sambl] Green (linked) albums: {data.get('green', 0)}")
|
||||
if 'red' in data:
|
||||
print(f" [Sambl] Red albums: {data.get('red', 0)}")
|
||||
elif isinstance(data, list):
|
||||
print(f" [Sambl] Response is a list with {len(data)} items")
|
||||
if len(data) > 0:
|
||||
print(f" [Sambl] First item keys: {list(data[0].keys()) if isinstance(data[0], dict) else 'Not a dict'}")
|
||||
print(f" [Sambl] First item sample: {json.dumps(data[0], indent=2)[:500] if isinstance(data[0], dict) else str(data[0])[:500]}")
|
||||
|
||||
# Parse the response to extract albums
|
||||
# SAMBL returns albums in 'albumData' with status indicators:
|
||||
# - 'red': Not in MusicBrainz (need to add)
|
||||
# - 'orange': In MusicBrainz but needs linking/updates (need to update)
|
||||
# - 'green': Properly linked (skip)
|
||||
albums_to_add = []
|
||||
albums_to_update = []
|
||||
|
||||
# SAMBL typically returns albums with status indicators
|
||||
# Missing albums are usually marked as not found in MusicBrainz
|
||||
albums = []
|
||||
if isinstance(data, dict):
|
||||
# Check for common response structures
|
||||
albums = data.get('albums', [])
|
||||
# SAMBL uses 'albumData' as the key for the albums array
|
||||
album_data = data.get('albumData')
|
||||
print(f" [Sambl] albumData type: {type(album_data)}, value: {album_data}")
|
||||
|
||||
if isinstance(album_data, list):
|
||||
albums = album_data
|
||||
elif isinstance(album_data, dict):
|
||||
# albumData might be a dict with nested structure
|
||||
print(f" [Sambl] albumData is dict with keys: {list(album_data.keys()) if album_data else 'None'}")
|
||||
albums = album_data.get('albums', album_data.get('data', []))
|
||||
|
||||
# Fallback to other possible keys
|
||||
if not albums and isinstance(data.get('albums'), list):
|
||||
albums = data.get('albums', [])
|
||||
if not albums and isinstance(data.get('data'), list):
|
||||
albums = data.get('data', [])
|
||||
elif isinstance(data, list):
|
||||
albums = data
|
||||
|
||||
for album in albums:
|
||||
# Look for albums that are missing from MusicBrainz
|
||||
# SAMBL typically marks these with status like 'missing', 'not_found', etc.
|
||||
status = str(album.get('status', '')).lower()
|
||||
mb_status = str(album.get('musicbrainz_status', '')).lower()
|
||||
print(f" [Sambl] Processing {len(albums)} album(s) from response")
|
||||
|
||||
# Check if album is missing (not linked to MusicBrainz)
|
||||
# SAMBL marks missing albums with various indicators
|
||||
is_missing = (
|
||||
'missing' in status or
|
||||
'not_found' in status or
|
||||
'not_linked' in status or
|
||||
'orange' in status or # SAMBL uses orange status for albums not linked
|
||||
album.get('musicbrainz_id') is None or
|
||||
album.get('musicbrainz_id') == '' or
|
||||
album.get('mbid') is None or
|
||||
album.get('mbid') == ''
|
||||
)
|
||||
# If we have status counts but no albums, something is wrong
|
||||
if isinstance(data, dict) and len(albums) == 0:
|
||||
print(f" [Sambl] ⚠️ Warning: Found status counts but no albums in albumData")
|
||||
print(f" [Sambl] Full response keys: {list(data.keys())}")
|
||||
print(f" [Sambl] Total albums reported: {data.get('total', 'N/A')}")
|
||||
# Try to print a sample of the response structure
|
||||
print(f" [Sambl] Response sample: {json.dumps(data, indent=2)[:1000]}")
|
||||
|
||||
if is_missing:
|
||||
# Extract Deezer URL and album info
|
||||
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
|
||||
if deezer_id and deezer_id != 'None':
|
||||
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
|
||||
for idx, album in enumerate(albums):
|
||||
# Get album status and MusicBrainz ID
|
||||
album_status = str(album.get('albumStatus', '')).lower()
|
||||
musicbrainz_id = album.get('mbid') or album.get('musicbrainz_id') or album.get('musicbrainzId') or ''
|
||||
album_title = album.get('name') or album.get('title') or 'Unknown'
|
||||
album_issues = album.get('albumIssues', [])
|
||||
|
||||
missing_albums.append({
|
||||
'title': album.get('title') or album.get('name') or 'Unknown Title',
|
||||
'deezer_url': deezer_url,
|
||||
'deezer_id': deezer_id,
|
||||
'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '',
|
||||
'artist_name': artist_name,
|
||||
'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or ''
|
||||
})
|
||||
# Debug: Print album details
|
||||
print(f" [Sambl] Album {idx+1}: {album_title}")
|
||||
print(f" Status: {album_status or 'N/A'}, MBID: {musicbrainz_id or 'None'}, Issues: {album_issues}")
|
||||
|
||||
if missing_albums:
|
||||
print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)")
|
||||
else:
|
||||
print(f" [Sambl] ✓ No missing albums found")
|
||||
# Extract Deezer URL and album info
|
||||
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
|
||||
if not deezer_id or deezer_id == 'None':
|
||||
print(f" ⚠️ Skipping - no valid Deezer ID found")
|
||||
continue
|
||||
|
||||
return missing_albums
|
||||
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
|
||||
album_data = {
|
||||
'title': album_title,
|
||||
'deezer_url': deezer_url,
|
||||
'deezer_id': deezer_id,
|
||||
'release_date': album.get('releaseDate') or album.get('release_date') or album.get('release') or '',
|
||||
'artist_name': artist_name,
|
||||
'cover_url': album.get('imageUrl') or album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or ''
|
||||
}
|
||||
|
||||
# Categorize albums based on status
|
||||
if album_status == 'red' or not musicbrainz_id or musicbrainz_id == '':
|
||||
# Red status or no MBID = needs to be added to MusicBrainz
|
||||
albums_to_add.append(album_data)
|
||||
print(f" ✓ Added to 'to add' list (not in MusicBrainz)")
|
||||
elif album_status == 'orange':
|
||||
# Orange status = in MusicBrainz but needs linking/updates
|
||||
album_data['mbid'] = musicbrainz_id
|
||||
album_data['mb_url'] = album.get('albumMBUrl', f'https://musicbrainz.org/release/{musicbrainz_id}')
|
||||
album_data['album_issues'] = album_issues
|
||||
albums_to_update.append(album_data)
|
||||
print(f" ✓ Added to 'to update' list (needs linking/updates)")
|
||||
else:
|
||||
# Green status = properly linked, skip
|
||||
print(f" ✓ Album is properly linked (MBID: {musicbrainz_id})")
|
||||
|
||||
print(f" [Sambl] ✓ Found {len(albums_to_add)} album(s) to add, {len(albums_to_update)} album(s) to update")
|
||||
|
||||
return albums_to_add, albums_to_update
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr)
|
||||
return []
|
||||
return [], []
|
||||
except (KeyError, ValueError, TypeError) as e:
|
||||
print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr)
|
||||
print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
|
||||
return []
|
||||
return [], []
|
||||
|
||||
|
||||
class SubmissionLinkGenerator:
|
||||
@@ -279,7 +337,8 @@ def main():
|
||||
print(f"Found {total_artists} monitored artists")
|
||||
print("\n" + "="*80)
|
||||
|
||||
all_missing_albums = []
|
||||
all_albums_to_add = []
|
||||
all_albums_to_update = []
|
||||
|
||||
for artist in artists:
|
||||
artist_name = artist.get('artistName', 'Unknown')
|
||||
@@ -292,88 +351,129 @@ def main():
|
||||
print(f"\n🎵 Artist: {artist_name}")
|
||||
print(f" MusicBrainz ID: {artist_mbid}")
|
||||
|
||||
# Find missing albums using Sambl
|
||||
missing_albums = sambl.find_missing_albums(artist_mbid, artist_name)
|
||||
# Find albums using Sambl
|
||||
albums_to_add, albums_to_update = sambl.find_missing_albums(artist_mbid, artist_name)
|
||||
|
||||
if missing_albums:
|
||||
print(f" Found {len(missing_albums)} missing album(s):")
|
||||
for album in missing_albums:
|
||||
# Process albums to add
|
||||
if albums_to_add:
|
||||
print(f"\n 📥 Albums to ADD ({len(albums_to_add)}):")
|
||||
for album in albums_to_add:
|
||||
deezer_url = album.get('deezer_url')
|
||||
if deezer_url:
|
||||
links = SubmissionLinkGenerator.generate_links(deezer_url)
|
||||
album['submission_links'] = links
|
||||
all_missing_albums.append(album)
|
||||
album['action'] = 'add'
|
||||
all_albums_to_add.append(album)
|
||||
|
||||
print(f" 📀 {album.get('title', 'Unknown Title')}")
|
||||
print(f" Deezer: {deezer_url}")
|
||||
print(f" a-tisket: {links['atisket_link']}")
|
||||
print(f" Harmony: {links['harmony_link']}")
|
||||
else:
|
||||
print(f" ✓ No missing albums found")
|
||||
print(f" 📀 {album.get('title', 'Unknown Title')}")
|
||||
print(f" Deezer: {deezer_url}")
|
||||
print(f" a-tisket: {links['atisket_link']}")
|
||||
print(f" Harmony: {links['harmony_link']}")
|
||||
|
||||
# Process albums to update
|
||||
if albums_to_update:
|
||||
print(f"\n 🔄 Albums to UPDATE ({len(albums_to_update)}):")
|
||||
for album in albums_to_update:
|
||||
deezer_url = album.get('deezer_url')
|
||||
mb_url = album.get('mb_url', '')
|
||||
issues = album.get('album_issues', [])
|
||||
if deezer_url:
|
||||
links = SubmissionLinkGenerator.generate_links(deezer_url)
|
||||
album['submission_links'] = links
|
||||
album['action'] = 'update'
|
||||
all_albums_to_update.append(album)
|
||||
|
||||
print(f" 📀 {album.get('title', 'Unknown Title')}")
|
||||
print(f" Deezer: {deezer_url}")
|
||||
if mb_url:
|
||||
print(f" MusicBrainz: {mb_url}")
|
||||
if issues:
|
||||
print(f" Issues: {', '.join(issues)}")
|
||||
print(f" a-tisket: {links['atisket_link']}")
|
||||
print(f" Harmony: {links['harmony_link']}")
|
||||
|
||||
if not albums_to_add and not albums_to_update:
|
||||
print(f" ✓ All albums are properly linked!")
|
||||
|
||||
# Generate summary report
|
||||
print("\n" + "="*80)
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
|
||||
print(f" Total missing albums found: {len(all_missing_albums)}")
|
||||
print(f" Albums to ADD: {len(all_albums_to_add)}")
|
||||
print(f" Albums to UPDATE: {len(all_albums_to_update)}")
|
||||
|
||||
# Save results to JSON file
|
||||
if all_missing_albums:
|
||||
all_albums = all_albums_to_add + all_albums_to_update
|
||||
if all_albums:
|
||||
output_file = "missing_albums.json"
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(all_missing_albums, f, indent=2, ensure_ascii=False)
|
||||
json.dump({
|
||||
'albums_to_add': all_albums_to_add,
|
||||
'albums_to_update': all_albums_to_update,
|
||||
'summary': {
|
||||
'total_to_add': len(all_albums_to_add),
|
||||
'total_to_update': len(all_albums_to_update),
|
||||
'total': len(all_albums)
|
||||
}
|
||||
}, f, indent=2, ensure_ascii=False)
|
||||
print(f"\n💾 Results saved to {output_file}")
|
||||
|
||||
# Generate HTML report with clickable links
|
||||
generate_html_report(all_missing_albums)
|
||||
generate_html_report(all_albums_to_add, all_albums_to_update)
|
||||
else:
|
||||
print("\n✨ All albums are already on MusicBrainz!")
|
||||
|
||||
|
||||
def generate_html_report(albums: List[Dict]):
|
||||
def generate_html_report(albums_to_add: List[Dict], albums_to_update: List[Dict]):
|
||||
"""Generate an HTML report with clickable submission links"""
|
||||
html_content = """<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Missing Albums - MusicBrainz Submission Links</title>
|
||||
<title>MusicBrainz Albums - Add & Update</title>
|
||||
<style>
|
||||
body {
|
||||
body {{
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
h1 {
|
||||
}}
|
||||
h1 {{
|
||||
color: #333;
|
||||
border-bottom: 3px solid #4CAF50;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
.album {
|
||||
}}
|
||||
h2 {{
|
||||
color: #2196F3;
|
||||
margin-top: 30px;
|
||||
border-bottom: 2px solid #2196F3;
|
||||
padding-bottom: 5px;
|
||||
}}
|
||||
.album {{
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
margin: 20px 0;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.album-title {
|
||||
}}
|
||||
.album-title {{
|
||||
font-size: 1.5em;
|
||||
font-weight: bold;
|
||||
color: #2196F3;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.artist-name {
|
||||
}}
|
||||
.artist-name {{
|
||||
color: #666;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.links {
|
||||
}}
|
||||
.links {{
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.link-button {
|
||||
}}
|
||||
.link-button {{
|
||||
display: inline-block;
|
||||
padding: 10px 20px;
|
||||
background-color: #4CAF50;
|
||||
@@ -381,40 +481,51 @@ def generate_html_report(albums: List[Dict]):
|
||||
text-decoration: none;
|
||||
border-radius: 5px;
|
||||
transition: background-color 0.3s;
|
||||
}
|
||||
.link-button:hover {
|
||||
}}
|
||||
.link-button:hover {{
|
||||
background-color: #45a049;
|
||||
}
|
||||
.link-button.atisket {
|
||||
}}
|
||||
.link-button.atisket {{
|
||||
background-color: #2196F3;
|
||||
}
|
||||
.link-button.atisket:hover {
|
||||
}}
|
||||
.link-button.atisket:hover {{
|
||||
background-color: #0b7dda;
|
||||
}
|
||||
.link-button.harmony {
|
||||
}}
|
||||
.link-button.harmony {{
|
||||
background-color: #FF9800;
|
||||
}
|
||||
.link-button.harmony:hover {
|
||||
}}
|
||||
.link-button.harmony:hover {{
|
||||
background-color: #e68900;
|
||||
}
|
||||
.deezer-link {
|
||||
}}
|
||||
.deezer-link {{
|
||||
color: #666;
|
||||
font-size: 0.9em;
|
||||
margin-top: 10px;
|
||||
}
|
||||
.summary {
|
||||
}}
|
||||
.mb-link {{
|
||||
color: #666;
|
||||
font-size: 0.9em;
|
||||
margin-top: 5px;
|
||||
}}
|
||||
.issues {{
|
||||
color: #FF9800;
|
||||
font-size: 0.9em;
|
||||
margin-top: 5px;
|
||||
font-style: italic;
|
||||
}}
|
||||
.summary {{
|
||||
background: white;
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>🎵 Missing Albums - MusicBrainz Submission Links</h1>
|
||||
<h1>🎵 MusicBrainz Albums - Add & Update</h1>
|
||||
<div class="summary">
|
||||
<strong>Total missing albums: {count}</strong>
|
||||
<strong>Albums to ADD: {add_count}</strong> | <strong>Albums to UPDATE: {update_count}</strong>
|
||||
</div>
|
||||
"""
|
||||
|
||||
@@ -422,6 +533,8 @@ def generate_html_report(albums: List[Dict]):
|
||||
<div class="album">
|
||||
<div class="album-title">{title}</div>
|
||||
<div class="artist-name">by {artist}</div>
|
||||
{mb_info}
|
||||
{issues_info}
|
||||
<div class="links">
|
||||
<a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
|
||||
<a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
|
||||
@@ -432,18 +545,47 @@ def generate_html_report(albums: List[Dict]):
|
||||
</div>
|
||||
"""
|
||||
|
||||
albums_html = ""
|
||||
for album in albums:
|
||||
def format_album(album, is_update=False):
|
||||
submission_links = album.get('submission_links', {})
|
||||
albums_html += album_html.format(
|
||||
mb_info = ""
|
||||
issues_info = ""
|
||||
|
||||
if is_update:
|
||||
mb_url = album.get('mb_url', '')
|
||||
if mb_url:
|
||||
mb_info = f'<div class="mb-link"><a href="{mb_url}" target="_blank">View on MusicBrainz</a></div>'
|
||||
issues = album.get('album_issues', [])
|
||||
if issues:
|
||||
issues_info = f'<div class="issues">Issues: {", ".join(issues)}</div>'
|
||||
|
||||
return album_html.format(
|
||||
title=album.get('title', 'Unknown Title'),
|
||||
artist=album.get('artist_name', 'Unknown Artist'),
|
||||
mb_info=mb_info,
|
||||
issues_info=issues_info,
|
||||
atisket_link=submission_links.get('atisket_link', '#'),
|
||||
harmony_link=submission_links.get('harmony_link', '#'),
|
||||
deezer_url=submission_links.get('deezer_url', '#')
|
||||
)
|
||||
|
||||
html_content = html_content.format(count=len(albums)) + albums_html + """
|
||||
albums_html = ""
|
||||
|
||||
# Albums to ADD section
|
||||
if albums_to_add:
|
||||
albums_html += '<h2>📥 Albums to ADD (Not in MusicBrainz)</h2>'
|
||||
for album in albums_to_add:
|
||||
albums_html += format_album(album, is_update=False)
|
||||
|
||||
# Albums to UPDATE section
|
||||
if albums_to_update:
|
||||
albums_html += '<h2>🔄 Albums to UPDATE (Need Linking/Updates)</h2>'
|
||||
for album in albums_to_update:
|
||||
albums_html += format_album(album, is_update=True)
|
||||
|
||||
html_content = html_content.format(
|
||||
add_count=len(albums_to_add),
|
||||
update_count=len(albums_to_update)
|
||||
) + albums_html + """
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user