#!/usr/bin/env python3 """ Script to identify missing albums on MusicBrainz from Deezer releases for artists monitored in Lidarr, and generate submission links. This script: 1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all" 2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases 3. Generates a-tisket/harmony links for submitting albums to MusicBrainz """ import requests import json import sys import os from typing import List, Dict, Optional from urllib.parse import quote from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() class LidarrClient: """Client for interacting with Lidarr API""" def __init__(self, base_url: str, api_key: str): self.base_url = base_url.rstrip('/') self.api_key = api_key self.headers = {'X-Api-Key': api_key} def get_artists(self) -> List[Dict]: """Fetch all artists from Lidarr""" url = f"{self.base_url}/api/v1/artist" try: response = requests.get(url, headers=self.headers) response.raise_for_status() return response.json() except requests.exceptions.RequestException as e: print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr) return [] def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]: """ Get artists with monitorNewItems set to specified values Args: monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all']) Returns: List of artist dictionaries matching the criteria """ if monitor_types is None: monitor_types = ['new', 'all'] all_artists = self.get_artists() filtered = [ artist for artist in all_artists if artist.get('monitorNewItems') in monitor_types ] return filtered class SamblClient: """ Client for interacting with SAMBL API to find missing albums. SAMBL (Streaming Artist MusicBrainz Lookup) is available at: - Website: https://sambl.lioncat6.com - GitHub: https://github.com/Lioncat6/SAMBL-React - API Root: https://sambl.lioncat6.com/api/ """ def __init__(self, base_url: str = None): # Default to the public SAMBL instance if no URL provided self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/') def _search_deezer_artist(self, artist_name: str) -> Optional[str]: """ Search for an artist on Deezer and return their Deezer ID. Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported. Args: artist_name: Name of the artist to search for Returns: Deezer artist ID as string, or None if not found """ try: # Use Deezer API to search for the artist deezer_search_url = "https://api.deezer.com/search/artist" params = {'q': artist_name, 'limit': 1} response = requests.get(deezer_search_url, params=params, timeout=10) response.raise_for_status() data = response.json() if data.get('data') and len(data['data']) > 0: artist_id = str(data['data'][0]['id']) print(f" [Sambl] Found Deezer artist ID: {artist_id}") return artist_id else: print(f" [Sambl] ⚠️ Artist '{artist_name}' not found on Deezer") return None except requests.exceptions.RequestException as e: print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr) return None def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]: """ Find albums missing on MusicBrainz from Deezer releases for an artist. Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums from Deezer with MusicBrainz and identifies missing ones. Args: artist_mbid: MusicBrainz ID of the artist artist_name: Name of the artist Returns: List of album dictionaries with Deezer URLs and metadata Format: [ { 'title': 'Album Title', 'deezer_url': 'https://www.deezer.com/album/123456789', 'deezer_id': '123456789', 'release_date': '2024-01-01', 'artist_name': artist_name } ] """ print(f" [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})") # First, we need to find the Deezer artist ID deezer_artist_id = self._search_deezer_artist(artist_name) if not deezer_artist_id: return [] # Now use SAMBL's compareArtistAlbums endpoint try: api_url = f"{self.base_url}/api/compareArtistAlbums" params = { 'provider_id': deezer_artist_id, 'provider': 'deezer', 'mbid': artist_mbid, 'full': 'true' # Get full information including missing albums } response = requests.get(api_url, params=params, timeout=30) response.raise_for_status() data = response.json() # Parse the response to extract missing albums # The response structure may vary, so we'll handle different formats missing_albums = [] # SAMBL typically returns albums with status indicators # Missing albums are usually marked as not found in MusicBrainz albums = [] if isinstance(data, dict): # Check for common response structures albums = data.get('albums', []) if not albums and isinstance(data.get('data'), list): albums = data.get('data', []) elif isinstance(data, list): albums = data for album in albums: # Look for albums that are missing from MusicBrainz # SAMBL typically marks these with status like 'missing', 'not_found', etc. status = str(album.get('status', '')).lower() mb_status = str(album.get('musicbrainz_status', '')).lower() # Check if album is missing (not linked to MusicBrainz) # SAMBL marks missing albums with various indicators is_missing = ( 'missing' in status or 'not_found' in status or 'not_linked' in status or 'orange' in status or # SAMBL uses orange status for albums not linked album.get('musicbrainz_id') is None or album.get('musicbrainz_id') == '' or album.get('mbid') is None or album.get('mbid') == '' ) if is_missing: # Extract Deezer URL and album info deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '') if deezer_id and deezer_id != 'None': deezer_url = f"https://www.deezer.com/album/{deezer_id}" missing_albums.append({ 'title': album.get('title') or album.get('name') or 'Unknown Title', 'deezer_url': deezer_url, 'deezer_id': deezer_id, 'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '', 'artist_name': artist_name, 'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or '' }) if missing_albums: print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)") else: print(f" [Sambl] ✓ No missing albums found") return missing_albums except requests.exceptions.RequestException as e: print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr) return [] except (KeyError, ValueError, TypeError) as e: print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr) print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr) return [] class SubmissionLinkGenerator: """Generate submission links for MusicBrainz using a-tisket and Harmony""" @staticmethod def generate_atisket_link(deezer_url: str) -> str: """Generate an a-tisket submission link from a Deezer URL""" encoded_url = quote(deezer_url, safe='') return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}" @staticmethod def generate_harmony_link(deezer_url: str) -> str: """Generate a Harmony submission link from a Deezer URL""" encoded_url = quote(deezer_url, safe='') return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}" @staticmethod def generate_links(deezer_url: str) -> Dict[str, str]: """Generate both a-tisket and Harmony links""" return { 'deezer_url': deezer_url, 'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url), 'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url) } def main(): """Main execution function""" # Configuration - loaded from .env file or environment variables LIDARR_URL = os.getenv("LIDARR_URL") LIDARR_API_KEY = os.getenv("LIDARR_API_KEY") SAMBL_URL = os.getenv("SAMBL_URL") or None # Set if Sambl has a web API MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5")) # Limit number of artists to process (default: 5) # Validate required configuration if not LIDARR_URL: print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr) sys.exit(1) if not LIDARR_API_KEY: print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr) sys.exit(1) # Initialize clients lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY) sambl = SamblClient(SAMBL_URL) print("Fetching monitored artists from Lidarr...") artists = lidarr.get_monitored_artists(['new', 'all']) if not artists: print("No artists found with monitorNewItems set to 'new' or 'all'") return total_artists = len(artists) # Limit the number of artists for testing if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS: print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)") artists = artists[:MAX_ARTISTS] else: print(f"Found {total_artists} monitored artists") print("\n" + "="*80) all_missing_albums = [] for artist in artists: artist_name = artist.get('artistName', 'Unknown') artist_mbid = artist.get('foreignArtistId') or artist.get('mbid') if not artist_mbid: print(f"\n⚠️ Skipping {artist_name} - no MusicBrainz ID found") continue print(f"\n🎵 Artist: {artist_name}") print(f" MusicBrainz ID: {artist_mbid}") # Find missing albums using Sambl missing_albums = sambl.find_missing_albums(artist_mbid, artist_name) if missing_albums: print(f" Found {len(missing_albums)} missing album(s):") for album in missing_albums: deezer_url = album.get('deezer_url') if deezer_url: links = SubmissionLinkGenerator.generate_links(deezer_url) album['submission_links'] = links all_missing_albums.append(album) print(f" 📀 {album.get('title', 'Unknown Title')}") print(f" Deezer: {deezer_url}") print(f" a-tisket: {links['atisket_link']}") print(f" Harmony: {links['harmony_link']}") else: print(f" ✓ No missing albums found") # Generate summary report print("\n" + "="*80) print(f"\n📊 Summary:") print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else "")) print(f" Total missing albums found: {len(all_missing_albums)}") # Save results to JSON file if all_missing_albums: output_file = "missing_albums.json" with open(output_file, 'w', encoding='utf-8') as f: json.dump(all_missing_albums, f, indent=2, ensure_ascii=False) print(f"\n💾 Results saved to {output_file}") # Generate HTML report with clickable links generate_html_report(all_missing_albums) else: print("\n✨ All albums are already on MusicBrainz!") def generate_html_report(albums: List[Dict]): """Generate an HTML report with clickable submission links""" html_content = """ Missing Albums - MusicBrainz Submission Links

🎵 Missing Albums - MusicBrainz Submission Links

Total missing albums: {count}
""" album_html = """
{title}
by {artist}
""" albums_html = "" for album in albums: submission_links = album.get('submission_links', {}) albums_html += album_html.format( title=album.get('title', 'Unknown Title'), artist=album.get('artist_name', 'Unknown Artist'), atisket_link=submission_links.get('atisket_link', '#'), harmony_link=submission_links.get('harmony_link', '#'), deezer_url=submission_links.get('deezer_url', '#') ) html_content = html_content.format(count=len(albums)) + albums_html + """ """ output_file = "missing_albums.html" with open(output_file, 'w', encoding='utf-8') as f: f.write(html_content) print(f"📄 HTML report saved to {output_file}") if __name__ == "__main__": main()