From 51df3f15db44d9903ea55b13c7e81a28ff7c2c3c Mon Sep 17 00:00:00 2001 From: Danilo Reyes Date: Tue, 11 Nov 2025 09:35:54 -0600 Subject: [PATCH] Add initial project files for MusicBrainz Missing Albums Finder - Created .gitignore to exclude unnecessary files and directories. - Added flake.nix for Nix package management and development environment setup. - Introduced flake.lock to lock dependencies for reproducibility. - Implemented main.py script to identify missing albums on MusicBrainz from Deezer releases for artists monitored in Lidarr, including functionality for generating submission links. --- .gitignore | 28 ++++ flake.lock | 61 +++++++ flake.nix | 37 +++++ main.py | 459 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 585 insertions(+) create mode 100644 .gitignore create mode 100644 flake.lock create mode 100644 flake.nix create mode 100755 main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bfb027c --- /dev/null +++ b/.gitignore @@ -0,0 +1,28 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +env/ +venv/ +ENV/ +.venv + +# Environment variables +# Note: .env is NOT ignored by default so you can commit a template +# If you want to ignore your actual .env, uncomment the line below +# .env + +# Output files +missing_albums.json +missing_albums.html + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.env + diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..07d5f28 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1762596750, + "narHash": "sha256-rXXuz51Bq7DHBlfIjN7jO8Bu3du5TV+3DSADBX7/9YQ=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "b6a8526db03f735b89dd5ff348f53f752e7ddc8e", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..c842f5c --- /dev/null +++ b/flake.nix @@ -0,0 +1,37 @@ +{ + description = "Lidarr to MusicBrainz Missing Albums Finder"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { inherit system; }; + pythonEnv = pkgs.python3.withPackages (ps: with ps; [ + requests + python-dotenv + ]); + in + { + devShells.default = pkgs.mkShell { + buildInputs = [ pythonEnv ]; + shellHook = '' + echo "Python environment ready!" + echo "Run: python main.py" + ''; + }; + + packages.default = pkgs.writeShellApplication { + name = "lidarr-musicbrainz"; + runtimeInputs = [ pythonEnv ]; + text = '' + python ${./main.py} "$@" + ''; + }; + } + ); +} + diff --git a/main.py b/main.py new file mode 100755 index 0000000..64f67ad --- /dev/null +++ b/main.py @@ -0,0 +1,459 @@ +#!/usr/bin/env python3 +""" +Script to identify missing albums on MusicBrainz from Deezer releases +for artists monitored in Lidarr, and generate submission links. + +This script: +1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all" +2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases +3. Generates a-tisket/harmony links for submitting albums to MusicBrainz +""" + +import requests +import json +import sys +import os +from typing import List, Dict, Optional +from urllib.parse import quote +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + + +class LidarrClient: + """Client for interacting with Lidarr API""" + + def __init__(self, base_url: str, api_key: str): + self.base_url = base_url.rstrip('/') + self.api_key = api_key + self.headers = {'X-Api-Key': api_key} + + def get_artists(self) -> List[Dict]: + """Fetch all artists from Lidarr""" + url = f"{self.base_url}/api/v1/artist" + try: + response = requests.get(url, headers=self.headers) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as e: + print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr) + return [] + + def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]: + """ + Get artists with monitorNewItems set to specified values + + Args: + monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all']) + + Returns: + List of artist dictionaries matching the criteria + """ + if monitor_types is None: + monitor_types = ['new', 'all'] + + all_artists = self.get_artists() + filtered = [ + artist for artist in all_artists + if artist.get('monitorNewItems') in monitor_types + ] + return filtered + + +class SamblClient: + """ + Client for interacting with SAMBL API to find missing albums. + + SAMBL (Streaming Artist MusicBrainz Lookup) is available at: + - Website: https://sambl.lioncat6.com + - GitHub: https://github.com/Lioncat6/SAMBL-React + - API Root: https://sambl.lioncat6.com/api/ + """ + + def __init__(self, base_url: str = None): + # Default to the public SAMBL instance if no URL provided + self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/') + + def _search_deezer_artist(self, artist_name: str) -> Optional[str]: + """ + Search for an artist on Deezer and return their Deezer ID. + Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported. + + Args: + artist_name: Name of the artist to search for + + Returns: + Deezer artist ID as string, or None if not found + """ + try: + # Use Deezer API to search for the artist + deezer_search_url = "https://api.deezer.com/search/artist" + params = {'q': artist_name, 'limit': 1} + response = requests.get(deezer_search_url, params=params, timeout=10) + response.raise_for_status() + + data = response.json() + if data.get('data') and len(data['data']) > 0: + artist_id = str(data['data'][0]['id']) + print(f" [Sambl] Found Deezer artist ID: {artist_id}") + return artist_id + else: + print(f" [Sambl] ⚠️ Artist '{artist_name}' not found on Deezer") + return None + except requests.exceptions.RequestException as e: + print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr) + return None + + def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]: + """ + Find albums missing on MusicBrainz from Deezer releases for an artist. + + Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums + from Deezer with MusicBrainz and identifies missing ones. + + Args: + artist_mbid: MusicBrainz ID of the artist + artist_name: Name of the artist + + Returns: + List of album dictionaries with Deezer URLs and metadata + Format: + [ + { + 'title': 'Album Title', + 'deezer_url': 'https://www.deezer.com/album/123456789', + 'deezer_id': '123456789', + 'release_date': '2024-01-01', + 'artist_name': artist_name + } + ] + """ + print(f" [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})") + + # First, we need to find the Deezer artist ID + deezer_artist_id = self._search_deezer_artist(artist_name) + if not deezer_artist_id: + return [] + + # Now use SAMBL's compareArtistAlbums endpoint + try: + api_url = f"{self.base_url}/api/compareArtistAlbums" + params = { + 'provider_id': deezer_artist_id, + 'provider': 'deezer', + 'mbid': artist_mbid, + 'full': 'true' # Get full information including missing albums + } + + response = requests.get(api_url, params=params, timeout=30) + response.raise_for_status() + + data = response.json() + + # Parse the response to extract missing albums + # The response structure may vary, so we'll handle different formats + missing_albums = [] + + # SAMBL typically returns albums with status indicators + # Missing albums are usually marked as not found in MusicBrainz + albums = [] + if isinstance(data, dict): + # Check for common response structures + albums = data.get('albums', []) + if not albums and isinstance(data.get('data'), list): + albums = data.get('data', []) + elif isinstance(data, list): + albums = data + + for album in albums: + # Look for albums that are missing from MusicBrainz + # SAMBL typically marks these with status like 'missing', 'not_found', etc. + status = str(album.get('status', '')).lower() + mb_status = str(album.get('musicbrainz_status', '')).lower() + + # Check if album is missing (not linked to MusicBrainz) + # SAMBL marks missing albums with various indicators + is_missing = ( + 'missing' in status or + 'not_found' in status or + 'not_linked' in status or + 'orange' in status or # SAMBL uses orange status for albums not linked + album.get('musicbrainz_id') is None or + album.get('musicbrainz_id') == '' or + album.get('mbid') is None or + album.get('mbid') == '' + ) + + if is_missing: + # Extract Deezer URL and album info + deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '') + if deezer_id and deezer_id != 'None': + deezer_url = f"https://www.deezer.com/album/{deezer_id}" + + missing_albums.append({ + 'title': album.get('title') or album.get('name') or 'Unknown Title', + 'deezer_url': deezer_url, + 'deezer_id': deezer_id, + 'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '', + 'artist_name': artist_name, + 'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or '' + }) + + if missing_albums: + print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)") + else: + print(f" [Sambl] ✓ No missing albums found") + + return missing_albums + + except requests.exceptions.RequestException as e: + print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr) + return [] + except (KeyError, ValueError, TypeError) as e: + print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr) + print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr) + return [] + + +class SubmissionLinkGenerator: + """Generate submission links for MusicBrainz using a-tisket and Harmony""" + + @staticmethod + def generate_atisket_link(deezer_url: str) -> str: + """Generate an a-tisket submission link from a Deezer URL""" + encoded_url = quote(deezer_url, safe='') + return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}" + + @staticmethod + def generate_harmony_link(deezer_url: str) -> str: + """Generate a Harmony submission link from a Deezer URL""" + encoded_url = quote(deezer_url, safe='') + return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}" + + @staticmethod + def generate_links(deezer_url: str) -> Dict[str, str]: + """Generate both a-tisket and Harmony links""" + return { + 'deezer_url': deezer_url, + 'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url), + 'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url) + } + + +def main(): + """Main execution function""" + # Configuration - loaded from .env file or environment variables + LIDARR_URL = os.getenv("LIDARR_URL") + LIDARR_API_KEY = os.getenv("LIDARR_API_KEY") + SAMBL_URL = os.getenv("SAMBL_URL") or None # Set if Sambl has a web API + MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5")) # Limit number of artists to process (default: 5) + + # Validate required configuration + if not LIDARR_URL: + print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr) + sys.exit(1) + + if not LIDARR_API_KEY: + print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr) + sys.exit(1) + + # Initialize clients + lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY) + sambl = SamblClient(SAMBL_URL) + + print("Fetching monitored artists from Lidarr...") + artists = lidarr.get_monitored_artists(['new', 'all']) + + if not artists: + print("No artists found with monitorNewItems set to 'new' or 'all'") + return + + total_artists = len(artists) + + # Limit the number of artists for testing + if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS: + print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)") + artists = artists[:MAX_ARTISTS] + else: + print(f"Found {total_artists} monitored artists") + print("\n" + "="*80) + + all_missing_albums = [] + + for artist in artists: + artist_name = artist.get('artistName', 'Unknown') + artist_mbid = artist.get('foreignArtistId') or artist.get('mbid') + + if not artist_mbid: + print(f"\n⚠️ Skipping {artist_name} - no MusicBrainz ID found") + continue + + print(f"\n🎵 Artist: {artist_name}") + print(f" MusicBrainz ID: {artist_mbid}") + + # Find missing albums using Sambl + missing_albums = sambl.find_missing_albums(artist_mbid, artist_name) + + if missing_albums: + print(f" Found {len(missing_albums)} missing album(s):") + for album in missing_albums: + deezer_url = album.get('deezer_url') + if deezer_url: + links = SubmissionLinkGenerator.generate_links(deezer_url) + album['submission_links'] = links + all_missing_albums.append(album) + + print(f" 📀 {album.get('title', 'Unknown Title')}") + print(f" Deezer: {deezer_url}") + print(f" a-tisket: {links['atisket_link']}") + print(f" Harmony: {links['harmony_link']}") + else: + print(f" ✓ No missing albums found") + + # Generate summary report + print("\n" + "="*80) + print(f"\n📊 Summary:") + print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else "")) + print(f" Total missing albums found: {len(all_missing_albums)}") + + # Save results to JSON file + if all_missing_albums: + output_file = "missing_albums.json" + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(all_missing_albums, f, indent=2, ensure_ascii=False) + print(f"\n💾 Results saved to {output_file}") + + # Generate HTML report with clickable links + generate_html_report(all_missing_albums) + else: + print("\n✨ All albums are already on MusicBrainz!") + + +def generate_html_report(albums: List[Dict]): + """Generate an HTML report with clickable submission links""" + html_content = """ + + + + + Missing Albums - MusicBrainz Submission Links + + + +

🎵 Missing Albums - MusicBrainz Submission Links

+
+ Total missing albums: {count} +
+""" + + album_html = """ +
+
{title}
+
by {artist}
+ + +
+""" + + albums_html = "" + for album in albums: + submission_links = album.get('submission_links', {}) + albums_html += album_html.format( + title=album.get('title', 'Unknown Title'), + artist=album.get('artist_name', 'Unknown Artist'), + atisket_link=submission_links.get('atisket_link', '#'), + harmony_link=submission_links.get('harmony_link', '#'), + deezer_url=submission_links.get('deezer_url', '#') + ) + + html_content = html_content.format(count=len(albums)) + albums_html + """ + + +""" + + output_file = "missing_albums.html" + with open(output_file, 'w', encoding='utf-8') as f: + f.write(html_content) + print(f"📄 HTML report saved to {output_file}") + + +if __name__ == "__main__": + main() +