From 51df3f15db44d9903ea55b13c7e81a28ff7c2c3c Mon Sep 17 00:00:00 2001
From: Danilo Reyes <danilo.reyes.251@proton.me>
Date: Tue, 11 Nov 2025 09:35:54 -0600
Subject: [PATCH] Add initial project files for MusicBrainz Missing Albums
 Finder

- Created .gitignore to exclude unnecessary files and directories.
- Added flake.nix for Nix package management and development environment setup.
- Introduced flake.lock to lock dependencies for reproducibility.
- Implemented main.py script to identify missing albums on MusicBrainz from Deezer releases for artists monitored in Lidarr, including functionality for generating submission links.
---
 .gitignore |  28 ++++
 flake.lock |  61 +++++++
 flake.nix  |  37 +++++
 main.py    | 459 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 585 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 flake.lock
 create mode 100644 flake.nix
 create mode 100755 main.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..bfb027c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,28 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+.venv
+
+# Environment variables
+# Note: .env is NOT ignored by default so you can commit a template
+# If you want to ignore your actual .env, uncomment the line below
+# .env
+
+# Output files
+missing_albums.json
+missing_albums.html
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.env
+
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..07d5f28
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,61 @@
+{
+  "nodes": {
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1731533236,
+        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1762596750,
+        "narHash": "sha256-rXXuz51Bq7DHBlfIjN7jO8Bu3du5TV+3DSADBX7/9YQ=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "b6a8526db03f735b89dd5ff348f53f752e7ddc8e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..c842f5c
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,37 @@
+{
+  description = "Lidarr to MusicBrainz Missing Albums Finder";
+
+  inputs = {
+    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
+    flake-utils.url = "github:numtide/flake-utils";
+  };
+
+  outputs = { self, nixpkgs, flake-utils }:
+    flake-utils.lib.eachDefaultSystem (system:
+      let
+        pkgs = import nixpkgs { inherit system; };
+        pythonEnv = pkgs.python3.withPackages (ps: with ps; [
+          requests
+          python-dotenv
+        ]);
+      in
+      {
+        devShells.default = pkgs.mkShell {
+          buildInputs = [ pythonEnv ];
+          shellHook = ''
+            echo "Python environment ready!"
+            echo "Run: python main.py"
+          '';
+        };
+
+        packages.default = pkgs.writeShellApplication {
+          name = "lidarr-musicbrainz";
+          runtimeInputs = [ pythonEnv ];
+          text = ''
+            python ${./main.py} "$@"
+          '';
+        };
+      }
+    );
+}
+
diff --git a/main.py b/main.py
new file mode 100755
index 0000000..64f67ad
--- /dev/null
+++ b/main.py
@@ -0,0 +1,459 @@
+#!/usr/bin/env python3
+"""
+Script to identify missing albums on MusicBrainz from Deezer releases
+for artists monitored in Lidarr, and generate submission links.
+
+This script:
+1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all"
+2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases
+3. Generates a-tisket/harmony links for submitting albums to MusicBrainz
+"""
+
+import requests
+import json
+import sys
+import os
+from typing import List, Dict, Optional
+from urllib.parse import quote
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+
+class LidarrClient:
+    """Client for interacting with Lidarr API"""
+    
+    def __init__(self, base_url: str, api_key: str):
+        self.base_url = base_url.rstrip('/')
+        self.api_key = api_key
+        self.headers = {'X-Api-Key': api_key}
+    
+    def get_artists(self) -> List[Dict]:
+        """Fetch all artists from Lidarr"""
+        url = f"{self.base_url}/api/v1/artist"
+        try:
+            response = requests.get(url, headers=self.headers)
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr)
+            return []
+    
+    def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]:
+        """
+        Get artists with monitorNewItems set to specified values
+        
+        Args:
+            monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all'])
+        
+        Returns:
+            List of artist dictionaries matching the criteria
+        """
+        if monitor_types is None:
+            monitor_types = ['new', 'all']
+        
+        all_artists = self.get_artists()
+        filtered = [
+            artist for artist in all_artists
+            if artist.get('monitorNewItems') in monitor_types
+        ]
+        return filtered
+
+
+class SamblClient:
+    """
+    Client for interacting with SAMBL API to find missing albums.
+    
+    SAMBL (Streaming Artist MusicBrainz Lookup) is available at:
+    - Website: https://sambl.lioncat6.com
+    - GitHub: https://github.com/Lioncat6/SAMBL-React
+    - API Root: https://sambl.lioncat6.com/api/
+    """
+    
+    def __init__(self, base_url: str = None):
+        # Default to the public SAMBL instance if no URL provided
+        self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/')
+    
+    def _search_deezer_artist(self, artist_name: str) -> Optional[str]:
+        """
+        Search for an artist on Deezer and return their Deezer ID.
+        Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported.
+        
+        Args:
+            artist_name: Name of the artist to search for
+        
+        Returns:
+            Deezer artist ID as string, or None if not found
+        """
+        try:
+            # Use Deezer API to search for the artist
+            deezer_search_url = "https://api.deezer.com/search/artist"
+            params = {'q': artist_name, 'limit': 1}
+            response = requests.get(deezer_search_url, params=params, timeout=10)
+            response.raise_for_status()
+            
+            data = response.json()
+            if data.get('data') and len(data['data']) > 0:
+                artist_id = str(data['data'][0]['id'])
+                print(f"  [Sambl] Found Deezer artist ID: {artist_id}")
+                return artist_id
+            else:
+                print(f"  [Sambl] ⚠️  Artist '{artist_name}' not found on Deezer")
+                return None
+        except requests.exceptions.RequestException as e:
+            print(f"  [Sambl] ⚠️  Error searching Deezer for artist: {e}", file=sys.stderr)
+            return None
+    
+    def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]:
+        """
+        Find albums missing on MusicBrainz from Deezer releases for an artist.
+        
+        Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums
+        from Deezer with MusicBrainz and identifies missing ones.
+        
+        Args:
+            artist_mbid: MusicBrainz ID of the artist
+            artist_name: Name of the artist
+        
+        Returns:
+            List of album dictionaries with Deezer URLs and metadata
+            Format:
+            [
+                {
+                    'title': 'Album Title',
+                    'deezer_url': 'https://www.deezer.com/album/123456789',
+                    'deezer_id': '123456789',
+                    'release_date': '2024-01-01',
+                    'artist_name': artist_name
+                }
+            ]
+        """
+        print(f"  [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})")
+        
+        # First, we need to find the Deezer artist ID
+        deezer_artist_id = self._search_deezer_artist(artist_name)
+        if not deezer_artist_id:
+            return []
+        
+        # Now use SAMBL's compareArtistAlbums endpoint
+        try:
+            api_url = f"{self.base_url}/api/compareArtistAlbums"
+            params = {
+                'provider_id': deezer_artist_id,
+                'provider': 'deezer',
+                'mbid': artist_mbid,
+                'full': 'true'  # Get full information including missing albums
+            }
+            
+            response = requests.get(api_url, params=params, timeout=30)
+            response.raise_for_status()
+            
+            data = response.json()
+            
+            # Parse the response to extract missing albums
+            # The response structure may vary, so we'll handle different formats
+            missing_albums = []
+            
+            # SAMBL typically returns albums with status indicators
+            # Missing albums are usually marked as not found in MusicBrainz
+            albums = []
+            if isinstance(data, dict):
+                # Check for common response structures
+                albums = data.get('albums', [])
+                if not albums and isinstance(data.get('data'), list):
+                    albums = data.get('data', [])
+            elif isinstance(data, list):
+                albums = data
+                
+            for album in albums:
+                # Look for albums that are missing from MusicBrainz
+                # SAMBL typically marks these with status like 'missing', 'not_found', etc.
+                status = str(album.get('status', '')).lower()
+                mb_status = str(album.get('musicbrainz_status', '')).lower()
+                
+                # Check if album is missing (not linked to MusicBrainz)
+                # SAMBL marks missing albums with various indicators
+                is_missing = (
+                    'missing' in status or 
+                    'not_found' in status or
+                    'not_linked' in status or
+                    'orange' in status or  # SAMBL uses orange status for albums not linked
+                    album.get('musicbrainz_id') is None or
+                    album.get('musicbrainz_id') == '' or
+                    album.get('mbid') is None or
+                    album.get('mbid') == ''
+                )
+                
+                if is_missing:
+                    # Extract Deezer URL and album info
+                    deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
+                    if deezer_id and deezer_id != 'None':
+                        deezer_url = f"https://www.deezer.com/album/{deezer_id}"
+                        
+                        missing_albums.append({
+                            'title': album.get('title') or album.get('name') or 'Unknown Title',
+                            'deezer_url': deezer_url,
+                            'deezer_id': deezer_id,
+                            'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '',
+                            'artist_name': artist_name,
+                            'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or ''
+                        })
+            
+            if missing_albums:
+                print(f"  [Sambl] ✓ Found {len(missing_albums)} missing album(s)")
+            else:
+                print(f"  [Sambl] ✓ No missing albums found")
+            
+            return missing_albums
+            
+        except requests.exceptions.RequestException as e:
+            print(f"  [Sambl] ⚠️  Error calling SAMBL API: {e}", file=sys.stderr)
+            return []
+        except (KeyError, ValueError, TypeError) as e:
+            print(f"  [Sambl] ⚠️  Error parsing SAMBL response: {e}", file=sys.stderr)
+            print(f"  [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
+            return []
+
+
+class SubmissionLinkGenerator:
+    """Generate submission links for MusicBrainz using a-tisket and Harmony"""
+    
+    @staticmethod
+    def generate_atisket_link(deezer_url: str) -> str:
+        """Generate an a-tisket submission link from a Deezer URL"""
+        encoded_url = quote(deezer_url, safe='')
+        return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}"
+    
+    @staticmethod
+    def generate_harmony_link(deezer_url: str) -> str:
+        """Generate a Harmony submission link from a Deezer URL"""
+        encoded_url = quote(deezer_url, safe='')
+        return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}"
+    
+    @staticmethod
+    def generate_links(deezer_url: str) -> Dict[str, str]:
+        """Generate both a-tisket and Harmony links"""
+        return {
+            'deezer_url': deezer_url,
+            'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url),
+            'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url)
+        }
+
+
+def main():
+    """Main execution function"""
+    # Configuration - loaded from .env file or environment variables
+    LIDARR_URL = os.getenv("LIDARR_URL")
+    LIDARR_API_KEY = os.getenv("LIDARR_API_KEY")
+    SAMBL_URL = os.getenv("SAMBL_URL") or None  # Set if Sambl has a web API
+    MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5"))  # Limit number of artists to process (default: 5)
+    
+    # Validate required configuration
+    if not LIDARR_URL:
+        print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr)
+        sys.exit(1)
+    
+    if not LIDARR_API_KEY:
+        print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr)
+        sys.exit(1)
+    
+    # Initialize clients
+    lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY)
+    sambl = SamblClient(SAMBL_URL)
+    
+    print("Fetching monitored artists from Lidarr...")
+    artists = lidarr.get_monitored_artists(['new', 'all'])
+    
+    if not artists:
+        print("No artists found with monitorNewItems set to 'new' or 'all'")
+        return
+    
+    total_artists = len(artists)
+    
+    # Limit the number of artists for testing
+    if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS:
+        print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)")
+        artists = artists[:MAX_ARTISTS]
+    else:
+        print(f"Found {total_artists} monitored artists")
+    print("\n" + "="*80)
+    
+    all_missing_albums = []
+    
+    for artist in artists:
+        artist_name = artist.get('artistName', 'Unknown')
+        artist_mbid = artist.get('foreignArtistId') or artist.get('mbid')
+        
+        if not artist_mbid:
+            print(f"\n⚠️  Skipping {artist_name} - no MusicBrainz ID found")
+            continue
+        
+        print(f"\n🎵 Artist: {artist_name}")
+        print(f"   MusicBrainz ID: {artist_mbid}")
+        
+        # Find missing albums using Sambl
+        missing_albums = sambl.find_missing_albums(artist_mbid, artist_name)
+        
+        if missing_albums:
+            print(f"   Found {len(missing_albums)} missing album(s):")
+            for album in missing_albums:
+                deezer_url = album.get('deezer_url')
+                if deezer_url:
+                    links = SubmissionLinkGenerator.generate_links(deezer_url)
+                    album['submission_links'] = links
+                    all_missing_albums.append(album)
+                    
+                    print(f"   📀 {album.get('title', 'Unknown Title')}")
+                    print(f"      Deezer: {deezer_url}")
+                    print(f"      a-tisket: {links['atisket_link']}")
+                    print(f"      Harmony: {links['harmony_link']}")
+        else:
+            print(f"   ✓ No missing albums found")
+    
+    # Generate summary report
+    print("\n" + "="*80)
+    print(f"\n📊 Summary:")
+    print(f"   Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
+    print(f"   Total missing albums found: {len(all_missing_albums)}")
+    
+    # Save results to JSON file
+    if all_missing_albums:
+        output_file = "missing_albums.json"
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(all_missing_albums, f, indent=2, ensure_ascii=False)
+        print(f"\n💾 Results saved to {output_file}")
+        
+        # Generate HTML report with clickable links
+        generate_html_report(all_missing_albums)
+    else:
+        print("\n✨ All albums are already on MusicBrainz!")
+
+
+def generate_html_report(albums: List[Dict]):
+    """Generate an HTML report with clickable submission links"""
+    html_content = """<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Missing Albums - MusicBrainz Submission Links</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 20px;
+            background-color: #f5f5f5;
+        }
+        h1 {
+            color: #333;
+            border-bottom: 3px solid #4CAF50;
+            padding-bottom: 10px;
+        }
+        .album {
+            background: white;
+            border-radius: 8px;
+            padding: 20px;
+            margin: 20px 0;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        .album-title {
+            font-size: 1.5em;
+            font-weight: bold;
+            color: #2196F3;
+            margin-bottom: 10px;
+        }
+        .artist-name {
+            color: #666;
+            margin-bottom: 15px;
+        }
+        .links {
+            display: flex;
+            gap: 10px;
+            flex-wrap: wrap;
+        }
+        .link-button {
+            display: inline-block;
+            padding: 10px 20px;
+            background-color: #4CAF50;
+            color: white;
+            text-decoration: none;
+            border-radius: 5px;
+            transition: background-color 0.3s;
+        }
+        .link-button:hover {
+            background-color: #45a049;
+        }
+        .link-button.atisket {
+            background-color: #2196F3;
+        }
+        .link-button.atisket:hover {
+            background-color: #0b7dda;
+        }
+        .link-button.harmony {
+            background-color: #FF9800;
+        }
+        .link-button.harmony:hover {
+            background-color: #e68900;
+        }
+        .deezer-link {
+            color: #666;
+            font-size: 0.9em;
+            margin-top: 10px;
+        }
+        .summary {
+            background: white;
+            padding: 15px;
+            border-radius: 8px;
+            margin-bottom: 20px;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+    </style>
+</head>
+<body>
+    <h1>🎵 Missing Albums - MusicBrainz Submission Links</h1>
+    <div class="summary">
+        <strong>Total missing albums: {count}</strong>
+    </div>
+"""
+    
+    album_html = """
+    <div class="album">
+        <div class="album-title">{title}</div>
+        <div class="artist-name">by {artist}</div>
+        <div class="links">
+            <a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
+            <a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
+        </div>
+        <div class="deezer-link">
+            <a href="{deezer_url}" target="_blank">View on Deezer</a>
+        </div>
+    </div>
+"""
+    
+    albums_html = ""
+    for album in albums:
+        submission_links = album.get('submission_links', {})
+        albums_html += album_html.format(
+            title=album.get('title', 'Unknown Title'),
+            artist=album.get('artist_name', 'Unknown Artist'),
+            atisket_link=submission_links.get('atisket_link', '#'),
+            harmony_link=submission_links.get('harmony_link', '#'),
+            deezer_url=submission_links.get('deezer_url', '#')
+        )
+    
+    html_content = html_content.format(count=len(albums)) + albums_html + """
+</body>
+</html>
+"""
+    
+    output_file = "missing_albums.html"
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write(html_content)
+    print(f"📄 HTML report saved to {output_file}")
+
+
+if __name__ == "__main__":
+    main()
+