Enhance development environment and improve main.py functionality

- Added `black` to the development environment in flake.nix for code formatting.
- Updated shell hook to include instructions for using `black`.
- Refactored `main.py` to improve code organization and readability, including reordering imports and simplifying list comprehensions.
- Enhanced album processing functions for better clarity and efficiency.
- Improved error handling and output formatting for better user experience.
This commit is contained in:
Danilo Reyes
2025-11-11 10:13:59 -06:00
parent 0dca7474a9
commit da03de4b3c
2 changed files with 290 additions and 367 deletions

View File

@@ -17,10 +17,14 @@
in in
{ {
devShells.default = pkgs.mkShell { devShells.default = pkgs.mkShell {
buildInputs = [ pythonEnv ]; buildInputs = [
pythonEnv
pkgs.black
];
shellHook = '' shellHook = ''
echo "Python environment ready!" echo "Python environment ready!"
echo "Run: python main.py" echo "Run: python main.py"
echo "Format code with: black main.py"
''; '';
}; };

555
main.py
View File

@@ -2,22 +2,17 @@
""" """
Script to identify missing albums on MusicBrainz from Deezer releases Script to identify missing albums on MusicBrainz from Deezer releases
for artists monitored in Lidarr, and generate submission links. for artists monitored in Lidarr, and generate submission links.
This script:
1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all"
2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases
3. Generates a-tisket/harmony links for submitting albums to MusicBrainz
""" """
import requests
import json import json
import sys
import os import os
from typing import List, Dict, Optional, Tuple import sys
from typing import Dict, List, Optional, Tuple
from urllib.parse import quote from urllib.parse import quote
import requests
from dotenv import load_dotenv from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv() load_dotenv()
@@ -25,9 +20,8 @@ class LidarrClient:
"""Client for interacting with Lidarr API""" """Client for interacting with Lidarr API"""
def __init__(self, base_url: str, api_key: str): def __init__(self, base_url: str, api_key: str):
self.base_url = base_url.rstrip('/') self.base_url = base_url.rstrip("/")
self.api_key = api_key self.headers = {"X-Api-Key": api_key}
self.headers = {'X-Api-Key': api_key}
def get_artists(self) -> List[Dict]: def get_artists(self) -> List[Dict]:
"""Fetch all artists from Lidarr""" """Fetch all artists from Lidarr"""
@@ -40,237 +34,160 @@ class LidarrClient:
print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr) print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr)
return [] return []
def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]: def get_monitored_artists(
""" self, monitor_types: Optional[List[str]] = None
Get artists with monitorNewItems set to specified values ) -> List[Dict]:
"""Get artists with monitorNewItems set to specified values"""
Args:
monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all'])
Returns:
List of artist dictionaries matching the criteria
"""
if monitor_types is None: if monitor_types is None:
monitor_types = ['new', 'all'] monitor_types = ["new", "all"]
return list(
all_artists = self.get_artists() filter(
filtered = [ lambda artist: artist.get("monitorNewItems") in monitor_types,
artist for artist in all_artists self.get_artists(),
if artist.get('monitorNewItems') in monitor_types )
] )
return filtered
class SamblClient: class SamblClient:
""" """Client for interacting with SAMBL API to find missing albums"""
Client for interacting with SAMBL API to find missing albums.
SAMBL (Streaming Artist MusicBrainz Lookup) is available at: def __init__(self, base_url: Optional[str] = None):
- Website: https://sambl.lioncat6.com self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip("/")
- GitHub: https://github.com/Lioncat6/SAMBL-React
- API Root: https://sambl.lioncat6.com/api/
"""
def __init__(self, base_url: str = None):
# Default to the public SAMBL instance if no URL provided
self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/')
def _search_deezer_artist(self, artist_name: str) -> Optional[str]: def _search_deezer_artist(self, artist_name: str) -> Optional[str]:
""" """Search for an artist on Deezer and return their Deezer ID"""
Search for an artist on Deezer and return their Deezer ID.
Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported.
Args:
artist_name: Name of the artist to search for
Returns:
Deezer artist ID as string, or None if not found
"""
try: try:
# Use Deezer API to search for the artist response = requests.get(
deezer_search_url = "https://api.deezer.com/search/artist" "https://api.deezer.com/search/artist",
params = {'q': artist_name, 'limit': 1} params={"q": artist_name, "limit": 1},
response = requests.get(deezer_search_url, params=params, timeout=10) timeout=10,
)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
if data.get('data') and len(data['data']) > 0: if data.get("data") and len(data["data"]) > 0:
artist_id = str(data['data'][0]['id']) return str(data["data"][0]["id"])
print(f" [Sambl] Found Deezer artist ID: {artist_id}") return None
return artist_id except requests.exceptions.RequestException:
else:
print(f" [Sambl] ⚠️ Artist '{artist_name}' not found on Deezer")
return None
except requests.exceptions.RequestException as e:
print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr)
return None return None
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> Tuple[List[Dict], List[Dict]]: def _extract_albums(self, data: Dict) -> List[Dict]:
""" """Extract albums list from SAMBL response"""
Find albums missing on MusicBrainz from Deezer releases for an artist. if isinstance(data, dict):
album_data = data.get("albumData")
if isinstance(album_data, list):
return album_data
if isinstance(album_data, dict):
return album_data.get("albums", album_data.get("data", []))
if isinstance(data.get("albums"), list):
return data.get("albums", [])
if isinstance(data.get("data"), list):
return data.get("data", [])
if isinstance(data, list):
return data
return []
Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums def _build_album_data(self, album: Dict, artist_name: str) -> Optional[Dict]:
from Deezer with MusicBrainz and identifies missing ones. """Build album data dictionary from SAMBL album response"""
deezer_id = str(
album.get("id") or album.get("deezer_id") or album.get("deezerId") or ""
)
if not deezer_id or deezer_id == "None":
return None
Args: return {
artist_mbid: MusicBrainz ID of the artist "title": album.get("name") or album.get("title") or "Unknown",
artist_name: Name of the artist "deezer_url": f"https://www.deezer.com/album/{deezer_id}",
"deezer_id": deezer_id,
"release_date": album.get("releaseDate")
or album.get("release_date")
or album.get("release")
or "",
"artist_name": artist_name,
"cover_url": album.get("imageUrl")
or album.get("cover")
or album.get("cover_medium")
or album.get("coverUrl")
or "",
}
Returns: def _has_valid_deezer_id(self, album: Dict) -> bool:
Tuple of (albums_to_add, albums_to_update) """Check if album has a valid Deezer ID"""
- albums_to_add: Albums not in MusicBrainz (red status, no mbid) deezer_id = str(
- albums_to_update: Albums in MusicBrainz but need linking/updates (orange status) album.get("id") or album.get("deezer_id") or album.get("deezerId") or ""
Format: )
[ return bool(deezer_id and deezer_id != "None")
{
'title': 'Album Title',
'deezer_url': 'https://www.deezer.com/album/123456789',
'deezer_id': '123456789',
'release_date': '2024-01-01',
'artist_name': artist_name,
'mbid': 'musicbrainz-id' (only for albums_to_update),
'album_issues': ['issue1', 'issue2'] (only for albums_to_update)
}
]
"""
print(f" [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})")
# First, we need to find the Deezer artist ID def _get_album_status(self, album: Dict) -> str:
"""Get album status from SAMBL response"""
return str(album.get("albumStatus", "")).lower()
def _get_mbid(self, album: Dict) -> str:
"""Extract MusicBrainz ID from album"""
return (
album.get("mbid")
or album.get("musicbrainz_id")
or album.get("musicbrainzId")
or ""
)
def _is_album_to_add(self, album: Dict) -> bool:
"""Check if album needs to be added to MusicBrainz"""
status = self._get_album_status(album)
mbid = self._get_mbid(album)
return status == "red" or not mbid
def _is_album_to_update(self, album: Dict) -> bool:
"""Check if album needs to be updated in MusicBrainz"""
return self._get_album_status(album) == "orange"
def _enrich_update_album(self, album_data: Dict, album: Dict) -> Dict:
"""Enrich album data with MusicBrainz information for updates"""
musicbrainz_id = self._get_mbid(album)
album_data["mbid"] = musicbrainz_id
album_data["mb_url"] = album.get(
"albumMBUrl", f"https://musicbrainz.org/release/{musicbrainz_id}"
)
album_data["album_issues"] = album.get("albumIssues", [])
return album_data
def find_missing_albums(
self, artist_mbid: str, artist_name: str
) -> Tuple[List[Dict], List[Dict]]:
"""Find albums missing on MusicBrainz from Deezer releases for an artist"""
deezer_artist_id = self._search_deezer_artist(artist_name) deezer_artist_id = self._search_deezer_artist(artist_name)
if not deezer_artist_id: if not deezer_artist_id:
return [] return [], []
# Now use SAMBL's compareArtistAlbums endpoint
try: try:
api_url = f"{self.base_url}/api/compareArtistAlbums"
params = { params = {
'provider_id': deezer_artist_id, "provider_id": deezer_artist_id,
'provider': 'deezer', "provider": "deezer",
'mbid': artist_mbid, "mbid": artist_mbid,
'full': 'true' # Get full information including missing albums "full": "true",
} }
response = requests.get(
response = requests.get(api_url, params=params, timeout=30) f"{self.base_url}/api/compareArtistAlbums", params=params, timeout=30
)
response.raise_for_status() response.raise_for_status()
albums = self._extract_albums(response.json())
data = response.json() valid_albums = list(filter(self._has_valid_deezer_id, albums))
albums_to_add_raw = list(filter(self._is_album_to_add, valid_albums))
albums_to_update_raw = list(filter(self._is_album_to_update, valid_albums))
# Debug: Print the raw response structure build_album = lambda album: self._build_album_data(album, artist_name)
print(f" [Sambl] Raw API response structure:") albums_to_add = list(map(build_album, albums_to_add_raw))
print(f" [Sambl] Response type: {type(data)}")
if isinstance(data, dict):
print(f" [Sambl] Top-level keys: {list(data.keys())}")
if 'albumData' in data:
album_data = data.get('albumData', [])
print(f" [Sambl] albumData count: {len(album_data)}")
if len(album_data) > 0:
print(f" [Sambl] First album keys: {list(album_data[0].keys()) if isinstance(album_data[0], dict) else 'Not a dict'}")
print(f" [Sambl] First album sample: {json.dumps(album_data[0], indent=2)[:500] if isinstance(album_data[0], dict) else str(album_data[0])[:500]}")
# Check status counts
if 'orange' in data:
print(f" [Sambl] Orange (missing) albums: {data.get('orange', 0)}")
if 'green' in data:
print(f" [Sambl] Green (linked) albums: {data.get('green', 0)}")
if 'red' in data:
print(f" [Sambl] Red albums: {data.get('red', 0)}")
elif isinstance(data, list):
print(f" [Sambl] Response is a list with {len(data)} items")
if len(data) > 0:
print(f" [Sambl] First item keys: {list(data[0].keys()) if isinstance(data[0], dict) else 'Not a dict'}")
print(f" [Sambl] First item sample: {json.dumps(data[0], indent=2)[:500] if isinstance(data[0], dict) else str(data[0])[:500]}")
# Parse the response to extract albums enrich_album = lambda album: self._enrich_update_album(
# SAMBL returns albums in 'albumData' with status indicators: self._build_album_data(album, artist_name), album
# - 'red': Not in MusicBrainz (need to add) )
# - 'orange': In MusicBrainz but needs linking/updates (need to update) albums_to_update = list(map(enrich_album, albums_to_update_raw))
# - 'green': Properly linked (skip)
albums_to_add = []
albums_to_update = []
albums = []
if isinstance(data, dict):
# SAMBL uses 'albumData' as the key for the albums array
album_data = data.get('albumData')
print(f" [Sambl] albumData type: {type(album_data)}, value: {album_data}")
if isinstance(album_data, list):
albums = album_data
elif isinstance(album_data, dict):
# albumData might be a dict with nested structure
print(f" [Sambl] albumData is dict with keys: {list(album_data.keys()) if album_data else 'None'}")
albums = album_data.get('albums', album_data.get('data', []))
# Fallback to other possible keys
if not albums and isinstance(data.get('albums'), list):
albums = data.get('albums', [])
if not albums and isinstance(data.get('data'), list):
albums = data.get('data', [])
elif isinstance(data, list):
albums = data
print(f" [Sambl] Processing {len(albums)} album(s) from response")
# If we have status counts but no albums, something is wrong
if isinstance(data, dict) and len(albums) == 0:
print(f" [Sambl] ⚠️ Warning: Found status counts but no albums in albumData")
print(f" [Sambl] Full response keys: {list(data.keys())}")
print(f" [Sambl] Total albums reported: {data.get('total', 'N/A')}")
# Try to print a sample of the response structure
print(f" [Sambl] Response sample: {json.dumps(data, indent=2)[:1000]}")
for idx, album in enumerate(albums):
# Get album status and MusicBrainz ID
album_status = str(album.get('albumStatus', '')).lower()
musicbrainz_id = album.get('mbid') or album.get('musicbrainz_id') or album.get('musicbrainzId') or ''
album_title = album.get('name') or album.get('title') or 'Unknown'
album_issues = album.get('albumIssues', [])
# Debug: Print album details
print(f" [Sambl] Album {idx+1}: {album_title}")
print(f" Status: {album_status or 'N/A'}, MBID: {musicbrainz_id or 'None'}, Issues: {album_issues}")
# Extract Deezer URL and album info
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
if not deezer_id or deezer_id == 'None':
print(f" ⚠️ Skipping - no valid Deezer ID found")
continue
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
album_data = {
'title': album_title,
'deezer_url': deezer_url,
'deezer_id': deezer_id,
'release_date': album.get('releaseDate') or album.get('release_date') or album.get('release') or '',
'artist_name': artist_name,
'cover_url': album.get('imageUrl') or album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or ''
}
# Categorize albums based on status
if album_status == 'red' or not musicbrainz_id or musicbrainz_id == '':
# Red status or no MBID = needs to be added to MusicBrainz
albums_to_add.append(album_data)
print(f" ✓ Added to 'to add' list (not in MusicBrainz)")
elif album_status == 'orange':
# Orange status = in MusicBrainz but needs linking/updates
album_data['mbid'] = musicbrainz_id
album_data['mb_url'] = album.get('albumMBUrl', f'https://musicbrainz.org/release/{musicbrainz_id}')
album_data['album_issues'] = album_issues
albums_to_update.append(album_data)
print(f" ✓ Added to 'to update' list (needs linking/updates)")
else:
# Green status = properly linked, skip
print(f" ✓ Album is properly linked (MBID: {musicbrainz_id})")
print(f" [Sambl] ✓ Found {len(albums_to_add)} album(s) to add, {len(albums_to_update)} album(s) to update")
return albums_to_add, albums_to_update return albums_to_add, albums_to_update
except requests.exceptions.RequestException as e: except requests.exceptions.RequestException:
print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr)
return [], [] return [], []
except (KeyError, ValueError, TypeError) as e: except (KeyError, ValueError, TypeError):
print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr)
print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
return [], [] return [], []
@@ -280,69 +197,94 @@ class SubmissionLinkGenerator:
@staticmethod @staticmethod
def generate_atisket_link(deezer_url: str) -> str: def generate_atisket_link(deezer_url: str) -> str:
"""Generate an a-tisket submission link from a Deezer URL""" """Generate an a-tisket submission link from a Deezer URL"""
encoded_url = quote(deezer_url, safe='') return f"https://atisket.pulsewidth.org.uk/?url={quote(deezer_url, safe='')}"
return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}"
@staticmethod @staticmethod
def generate_harmony_link(deezer_url: str) -> str: def generate_harmony_link(deezer_url: str) -> str:
"""Generate a Harmony submission link from a Deezer URL""" """Generate a Harmony submission link from a Deezer URL"""
encoded_url = quote(deezer_url, safe='') return f"https://harmony.pulsewidth.org.uk/?url={quote(deezer_url, safe='')}"
return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}"
@staticmethod @staticmethod
def generate_links(deezer_url: str) -> Dict[str, str]: def generate_links(deezer_url: str) -> Dict[str, str]:
"""Generate both a-tisket and Harmony links""" """Generate both a-tisket and Harmony links"""
return { return {
'deezer_url': deezer_url, "deezer_url": deezer_url,
'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url), "atisket_link": SubmissionLinkGenerator.generate_atisket_link(deezer_url),
'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url) "harmony_link": SubmissionLinkGenerator.generate_harmony_link(deezer_url),
} }
def _process_albums(albums: List[Dict], action: str) -> List[Dict]:
"""Process albums and generate submission links"""
return list(
map(
lambda album: {
**album,
"submission_links": SubmissionLinkGenerator.generate_links(
album["deezer_url"]
),
"action": action,
},
filter(lambda album: album.get("deezer_url"), albums),
)
)
def _format_album_output(album: Dict) -> str:
"""Format album information for console output"""
lines = [f" 📀 {album.get('title', 'Unknown Title')}"]
lines.append(f" Deezer: {album.get('deezer_url')}")
if album.get("mb_url"):
lines.append(f" MusicBrainz: {album['mb_url']}")
if album.get("album_issues"):
lines.append(f" Issues: {', '.join(album['album_issues'])}")
links = album.get("submission_links", {})
lines.append(f" a-tisket: {links.get('atisket_link')}")
lines.append(f" Harmony: {links.get('harmony_link')}")
return "\n".join(lines)
def main(): def main():
"""Main execution function""" """Main execution function"""
# Configuration - loaded from .env file or environment variables
LIDARR_URL = os.getenv("LIDARR_URL") LIDARR_URL = os.getenv("LIDARR_URL")
LIDARR_API_KEY = os.getenv("LIDARR_API_KEY") LIDARR_API_KEY = os.getenv("LIDARR_API_KEY")
SAMBL_URL = os.getenv("SAMBL_URL") or None # Set if Sambl has a web API SAMBL_URL = os.getenv("SAMBL_URL") or None
MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5")) # Limit number of artists to process (default: 5) MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5"))
# Validate required configuration
if not LIDARR_URL: if not LIDARR_URL:
print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr) print("Error: LIDARR_URL not set.", file=sys.stderr)
sys.exit(1) sys.exit(1)
if not LIDARR_API_KEY: if not LIDARR_API_KEY:
print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr) print("Error: LIDARR_API_KEY not set.", file=sys.stderr)
sys.exit(1) sys.exit(1)
# Initialize clients
lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY) lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY)
sambl = SamblClient(SAMBL_URL) sambl = SamblClient(SAMBL_URL)
print("Fetching monitored artists from Lidarr...") print("Fetching monitored artists from Lidarr...")
artists = lidarr.get_monitored_artists(['new', 'all']) artists = lidarr.get_monitored_artists(["new", "all"])
if not artists: if not artists:
print("No artists found with monitorNewItems set to 'new' or 'all'") print("No artists found with monitorNewItems set to 'new' or 'all'")
return return
total_artists = len(artists) total_artists = len(artists)
# Limit the number of artists for testing
if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS: if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS:
print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)") print(
f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)"
)
artists = artists[:MAX_ARTISTS] artists = artists[:MAX_ARTISTS]
else: else:
print(f"Found {total_artists} monitored artists") print(f"Found {total_artists} monitored artists")
print("\n" + "="*80) print("\n" + "=" * 80)
all_albums_to_add = [] all_albums_to_add = []
all_albums_to_update = [] all_albums_to_update = []
for artist in artists: for artist in artists:
artist_name = artist.get('artistName', 'Unknown') artist_name = artist.get("artistName", "Unknown")
artist_mbid = artist.get('foreignArtistId') or artist.get('mbid') artist_mbid = artist.get("foreignArtistId") or artist.get("mbid")
if not artist_mbid: if not artist_mbid:
print(f"\n⚠️ Skipping {artist_name} - no MusicBrainz ID found") print(f"\n⚠️ Skipping {artist_name} - no MusicBrainz ID found")
@@ -351,77 +293,53 @@ def main():
print(f"\n🎵 Artist: {artist_name}") print(f"\n🎵 Artist: {artist_name}")
print(f" MusicBrainz ID: {artist_mbid}") print(f" MusicBrainz ID: {artist_mbid}")
# Find albums using Sambl albums_to_add, albums_to_update = sambl.find_missing_albums(
albums_to_add, albums_to_update = sambl.find_missing_albums(artist_mbid, artist_name) artist_mbid, artist_name
)
# Process albums to add
if albums_to_add: if albums_to_add:
print(f"\n 📥 Albums to ADD ({len(albums_to_add)}):") print(f"\n 📥 Albums to ADD ({len(albums_to_add)}):")
for album in albums_to_add: processed = _process_albums(albums_to_add, "add")
deezer_url = album.get('deezer_url') all_albums_to_add.extend(processed)
if deezer_url: print("\n".join(map(_format_album_output, processed)))
links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links
album['action'] = 'add'
all_albums_to_add.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
# Process albums to update
if albums_to_update: if albums_to_update:
print(f"\n 🔄 Albums to UPDATE ({len(albums_to_update)}):") print(f"\n 🔄 Albums to UPDATE ({len(albums_to_update)}):")
for album in albums_to_update: processed = _process_albums(albums_to_update, "update")
deezer_url = album.get('deezer_url') all_albums_to_update.extend(processed)
mb_url = album.get('mb_url', '') print("\n".join(map(_format_album_output, processed)))
issues = album.get('album_issues', [])
if deezer_url:
links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links
album['action'] = 'update'
all_albums_to_update.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
if mb_url:
print(f" MusicBrainz: {mb_url}")
if issues:
print(f" Issues: {', '.join(issues)}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
if not albums_to_add and not albums_to_update: if not albums_to_add and not albums_to_update:
print(f" ✓ All albums are properly linked!") print(f" ✓ All albums are properly linked!")
# Generate summary report print("\n" + "=" * 80)
print("\n" + "="*80)
print(f"\n📊 Summary:") print(f"\n📊 Summary:")
print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else "")) artists_info = f" Artists processed: {len(artists)}"
if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS:
artists_info += f" (of {total_artists} total)"
print(artists_info)
print(f" Albums to ADD: {len(all_albums_to_add)}") print(f" Albums to ADD: {len(all_albums_to_add)}")
print(f" Albums to UPDATE: {len(all_albums_to_update)}") print(f" Albums to UPDATE: {len(all_albums_to_update)}")
# Save results to JSON file
all_albums = all_albums_to_add + all_albums_to_update all_albums = all_albums_to_add + all_albums_to_update
if all_albums: if not all_albums:
output_file = "missing_albums.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump({
'albums_to_add': all_albums_to_add,
'albums_to_update': all_albums_to_update,
'summary': {
'total_to_add': len(all_albums_to_add),
'total_to_update': len(all_albums_to_update),
'total': len(all_albums)
}
}, f, indent=2, ensure_ascii=False)
print(f"\n💾 Results saved to {output_file}")
# Generate HTML report with clickable links
generate_html_report(all_albums_to_add, all_albums_to_update)
else:
print("\n✨ All albums are already on MusicBrainz!") print("\n✨ All albums are already on MusicBrainz!")
return
output_data = {
"albums_to_add": all_albums_to_add,
"albums_to_update": all_albums_to_update,
"summary": {
"total_to_add": len(all_albums_to_add),
"total_to_update": len(all_albums_to_update),
"total": len(all_albums),
},
}
with open("missing_albums.json", "w", encoding="utf-8") as f:
json.dump(output_data, f, indent=2, ensure_ascii=False)
print(f"\n💾 Results saved to missing_albums.json")
generate_html_report(all_albums_to_add, all_albums_to_update)
def generate_html_report(albums_to_add: List[Dict], albums_to_update: List[Dict]): def generate_html_report(albums_to_add: List[Dict], albums_to_update: List[Dict]):
@@ -545,57 +463,58 @@ def generate_html_report(albums_to_add: List[Dict], albums_to_update: List[Dict]
</div> </div>
""" """
def format_album(album, is_update=False): def format_album(album: Dict, is_update: bool = False) -> str:
submission_links = album.get('submission_links', {}) submission_links = album.get("submission_links", {})
mb_info = "" mb_info = ""
issues_info = "" issues_info = ""
if is_update: if is_update:
mb_url = album.get('mb_url', '') mb_url = album.get("mb_url", "")
if mb_url: if mb_url:
mb_info = f'<div class="mb-link"><a href="{mb_url}" target="_blank">View on MusicBrainz</a></div>' mb_info = f'<div class="mb-link"><a href="{mb_url}" target="_blank">View on MusicBrainz</a></div>'
issues = album.get('album_issues', []) issues = album.get("album_issues", [])
if issues: if issues:
issues_info = f'<div class="issues">Issues: {", ".join(issues)}</div>' issues_info = f'<div class="issues">Issues: {", ".join(issues)}</div>'
title = album.get("title", "Unknown Title")
artist = album.get("artist_name", "Unknown Artist")
atisket_link = submission_links.get("atisket_link", "#")
harmony_link = submission_links.get("harmony_link", "#")
deezer_url = submission_links.get("deezer_url", "#")
return album_html.format( return album_html.format(
title=album.get('title', 'Unknown Title'), title=title,
artist=album.get('artist_name', 'Unknown Artist'), artist=artist,
mb_info=mb_info, mb_info=mb_info,
issues_info=issues_info, issues_info=issues_info,
atisket_link=submission_links.get('atisket_link', '#'), atisket_link=atisket_link,
harmony_link=submission_links.get('harmony_link', '#'), harmony_link=harmony_link,
deezer_url=submission_links.get('deezer_url', '#') deezer_url=deezer_url,
) )
albums_html = "" albums_html = ""
# Albums to ADD section
if albums_to_add: if albums_to_add:
albums_html += '<h2>📥 Albums to ADD (Not in MusicBrainz)</h2>' albums_html += "<h2>📥 Albums to ADD (Not in MusicBrainz)</h2>"
for album in albums_to_add: formatted_add = map(lambda album: format_album(album, False), albums_to_add)
albums_html += format_album(album, is_update=False) albums_html += "".join(formatted_add)
# Albums to UPDATE section
if albums_to_update: if albums_to_update:
albums_html += '<h2>🔄 Albums to UPDATE (Need Linking/Updates)</h2>' albums_html += "<h2>🔄 Albums to UPDATE (Need Linking/Updates)</h2>"
for album in albums_to_update: formatted_update = map(
albums_html += format_album(album, is_update=True) lambda album: format_album(album, True), albums_to_update
)
albums_html += "".join(formatted_update)
html_content = html_content.format( add_count = len(albums_to_add)
add_count=len(albums_to_add), update_count = len(albums_to_update)
update_count=len(albums_to_update) html_header = html_content.format(add_count=add_count, update_count=update_count)
) + albums_html + """ html_footer = "\n</body>\n</html>\n"
</body> html_content = html_header + albums_html + html_footer
</html>
"""
output_file = "missing_albums.html" with open("missing_albums.html", "w", encoding="utf-8") as f:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content) f.write(html_content)
print(f"📄 HTML report saved to {output_file}") print(f"📄 HTML report saved to missing_albums.html")
if __name__ == "__main__": if __name__ == "__main__":
main() main()