lidarr-mb-gap/main.py

#!/usr/bin/env python3
"""
Script to identify missing albums on MusicBrainz from Deezer releases
for artists monitored in Lidarr, and generate submission links.

This script:
1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all"
2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases
3. Generates a-tisket/harmony links for submitting albums to MusicBrainz
"""

import requests
import json
import sys
import os
from typing import List, Dict, Optional
from urllib.parse import quote
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()


class LidarrClient:
    """Client for interacting with Lidarr API"""

    def __init__(self, base_url: str, api_key: str):
        self.base_url = base_url.rstrip('/')
        self.api_key = api_key
        self.headers = {'X-Api-Key': api_key}

    def get_artists(self) -> List[Dict]:
        """Fetch all artists from Lidarr"""
        url = f"{self.base_url}/api/v1/artist"
        try:
            response = requests.get(url, headers=self.headers)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr)
            return []

    def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]:
        """
        Get artists with monitorNewItems set to specified values

        Args:
            monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all'])

        Returns:
            List of artist dictionaries matching the criteria
        """
        if monitor_types is None:
            monitor_types = ['new', 'all']

        all_artists = self.get_artists()
        filtered = [
            artist for artist in all_artists
            if artist.get('monitorNewItems') in monitor_types
        ]
        return filtered


class SamblClient:
    """
    Client for interacting with SAMBL API to find missing albums.

    SAMBL (Streaming Artist MusicBrainz Lookup) is available at:
    - Website: https://sambl.lioncat6.com
    - GitHub: https://github.com/Lioncat6/SAMBL-React
    - API Root: https://sambl.lioncat6.com/api/
    """

    def __init__(self, base_url: str = None):
        # Default to the public SAMBL instance if no URL provided
        self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/')

    def _search_deezer_artist(self, artist_name: str) -> Optional[str]:
        """
        Search for an artist on Deezer and return their Deezer ID.
        Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported.

        Args:
            artist_name: Name of the artist to search for

        Returns:
            Deezer artist ID as string, or None if not found
        """
        try:
            # Use Deezer API to search for the artist
            deezer_search_url = "https://api.deezer.com/search/artist"
            params = {'q': artist_name, 'limit': 1}
            response = requests.get(deezer_search_url, params=params, timeout=10)
            response.raise_for_status()

            data = response.json()
            if data.get('data') and len(data['data']) > 0:
                artist_id = str(data['data'][0]['id'])
                print(f"  [Sambl] Found Deezer artist ID: {artist_id}")
                return artist_id
            else:
                print(f"  [Sambl] ⚠️  Artist '{artist_name}' not found on Deezer")
                return None
        except requests.exceptions.RequestException as e:
            print(f"  [Sambl] ⚠️  Error searching Deezer for artist: {e}", file=sys.stderr)
            return None

    def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]:
        """
        Find albums missing on MusicBrainz from Deezer releases for an artist.

        Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums
        from Deezer with MusicBrainz and identifies missing ones.

        Args:
            artist_mbid: MusicBrainz ID of the artist
            artist_name: Name of the artist

        Returns:
            List of album dictionaries with Deezer URLs and metadata
            Format:
            [
                {
                    'title': 'Album Title',
                    'deezer_url': 'https://www.deezer.com/album/123456789',
                    'deezer_id': '123456789',
                    'release_date': '2024-01-01',
                    'artist_name': artist_name
                }
            ]
        """
        print(f"  [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})")

        # First, we need to find the Deezer artist ID
        deezer_artist_id = self._search_deezer_artist(artist_name)
        if not deezer_artist_id:
            return []

        # Now use SAMBL's compareArtistAlbums endpoint
        try:
            api_url = f"{self.base_url}/api/compareArtistAlbums"
            params = {
                'provider_id': deezer_artist_id,
                'provider': 'deezer',
                'mbid': artist_mbid,
                'full': 'true'  # Get full information including missing albums
            }

            response = requests.get(api_url, params=params, timeout=30)
            response.raise_for_status()

            data = response.json()

            # Parse the response to extract missing albums
            # The response structure may vary, so we'll handle different formats
            missing_albums = []

            # SAMBL typically returns albums with status indicators
            # Missing albums are usually marked as not found in MusicBrainz
            albums = []
            if isinstance(data, dict):
                # Check for common response structures
                albums = data.get('albums', [])
                if not albums and isinstance(data.get('data'), list):
                    albums = data.get('data', [])
            elif isinstance(data, list):
                albums = data

            for album in albums:
                # Look for albums that are missing from MusicBrainz
                # SAMBL typically marks these with status like 'missing', 'not_found', etc.
                status = str(album.get('status', '')).lower()
                mb_status = str(album.get('musicbrainz_status', '')).lower()

                # Check if album is missing (not linked to MusicBrainz)
                # SAMBL marks missing albums with various indicators
                is_missing = (
                    'missing' in status or
                    'not_found' in status or
                    'not_linked' in status or
                    'orange' in status or  # SAMBL uses orange status for albums not linked
                    album.get('musicbrainz_id') is None or
                    album.get('musicbrainz_id') == '' or
                    album.get('mbid') is None or
                    album.get('mbid') == ''
                )

                if is_missing:
                    # Extract Deezer URL and album info
                    deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
                    if deezer_id and deezer_id != 'None':
                        deezer_url = f"https://www.deezer.com/album/{deezer_id}"

                        missing_albums.append({
                            'title': album.get('title') or album.get('name') or 'Unknown Title',
                            'deezer_url': deezer_url,
                            'deezer_id': deezer_id,
                            'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '',
                            'artist_name': artist_name,
                            'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or ''
                        })

            if missing_albums:
                print(f"  [Sambl] ✓ Found {len(missing_albums)} missing album(s)")
            else:
                print(f"  [Sambl] ✓ No missing albums found")

            return missing_albums

        except requests.exceptions.RequestException as e:
            print(f"  [Sambl] ⚠️  Error calling SAMBL API: {e}", file=sys.stderr)
            return []
        except (KeyError, ValueError, TypeError) as e:
            print(f"  [Sambl] ⚠️  Error parsing SAMBL response: {e}", file=sys.stderr)
            print(f"  [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
            return []


class SubmissionLinkGenerator:
    """Generate submission links for MusicBrainz using a-tisket and Harmony"""

    @staticmethod
    def generate_atisket_link(deezer_url: str) -> str:
        """Generate an a-tisket submission link from a Deezer URL"""
        encoded_url = quote(deezer_url, safe='')
        return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}"

    @staticmethod
    def generate_harmony_link(deezer_url: str) -> str:
        """Generate a Harmony submission link from a Deezer URL"""
        encoded_url = quote(deezer_url, safe='')
        return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}"

    @staticmethod
    def generate_links(deezer_url: str) -> Dict[str, str]:
        """Generate both a-tisket and Harmony links"""
        return {
            'deezer_url': deezer_url,
            'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url),
            'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url)
        }


def main():
    """Main execution function"""
    # Configuration - loaded from .env file or environment variables
    LIDARR_URL = os.getenv("LIDARR_URL")
    LIDARR_API_KEY = os.getenv("LIDARR_API_KEY")
    SAMBL_URL = os.getenv("SAMBL_URL") or None  # Set if Sambl has a web API
    MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5"))  # Limit number of artists to process (default: 5)

    # Validate required configuration
    if not LIDARR_URL:
        print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr)
        sys.exit(1)

    if not LIDARR_API_KEY:
        print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr)
        sys.exit(1)

    # Initialize clients
    lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY)
    sambl = SamblClient(SAMBL_URL)

    print("Fetching monitored artists from Lidarr...")
    artists = lidarr.get_monitored_artists(['new', 'all'])

    if not artists:
        print("No artists found with monitorNewItems set to 'new' or 'all'")
        return

    total_artists = len(artists)

    # Limit the number of artists for testing
    if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS:
        print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)")
        artists = artists[:MAX_ARTISTS]
    else:
        print(f"Found {total_artists} monitored artists")
    print("\n" + "="*80)

    all_missing_albums = []

    for artist in artists:
        artist_name = artist.get('artistName', 'Unknown')
        artist_mbid = artist.get('foreignArtistId') or artist.get('mbid')

        if not artist_mbid:
            print(f"\n⚠️  Skipping {artist_name} - no MusicBrainz ID found")
            continue

        print(f"\n🎵 Artist: {artist_name}")
        print(f"   MusicBrainz ID: {artist_mbid}")

        # Find missing albums using Sambl
        missing_albums = sambl.find_missing_albums(artist_mbid, artist_name)

        if missing_albums:
            print(f"   Found {len(missing_albums)} missing album(s):")
            for album in missing_albums:
                deezer_url = album.get('deezer_url')
                if deezer_url:
                    links = SubmissionLinkGenerator.generate_links(deezer_url)
                    album['submission_links'] = links
                    all_missing_albums.append(album)

                    print(f"   📀 {album.get('title', 'Unknown Title')}")
                    print(f"      Deezer: {deezer_url}")
                    print(f"      a-tisket: {links['atisket_link']}")
                    print(f"      Harmony: {links['harmony_link']}")
        else:
            print(f"   ✓ No missing albums found")

    # Generate summary report
    print("\n" + "="*80)
    print(f"\n📊 Summary:")
    print(f"   Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
    print(f"   Total missing albums found: {len(all_missing_albums)}")

    # Save results to JSON file
    if all_missing_albums:
        output_file = "missing_albums.json"
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(all_missing_albums, f, indent=2, ensure_ascii=False)
        print(f"\n💾 Results saved to {output_file}")

        # Generate HTML report with clickable links
        generate_html_report(all_missing_albums)
    else:
        print("\n✨ All albums are already on MusicBrainz!")


def generate_html_report(albums: List[Dict]):
    """Generate an HTML report with clickable submission links"""
    html_content = """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Missing Albums - MusicBrainz Submission Links</title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
            max-width: 1200px;
            margin: 0 auto;
            padding: 20px;
            background-color: #f5f5f5;
        }
        h1 {
            color: #333;
            border-bottom: 3px solid #4CAF50;
            padding-bottom: 10px;
        }
        .album {
            background: white;
            border-radius: 8px;
            padding: 20px;
            margin: 20px 0;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        .album-title {
            font-size: 1.5em;
            font-weight: bold;
            color: #2196F3;
            margin-bottom: 10px;
        }
        .artist-name {
            color: #666;
            margin-bottom: 15px;
        }
        .links {
            display: flex;
            gap: 10px;
            flex-wrap: wrap;
        }
        .link-button {
            display: inline-block;
            padding: 10px 20px;
            background-color: #4CAF50;
            color: white;
            text-decoration: none;
            border-radius: 5px;
            transition: background-color 0.3s;
        }
        .link-button:hover {
            background-color: #45a049;
        }
        .link-button.atisket {
            background-color: #2196F3;
        }
        .link-button.atisket:hover {
            background-color: #0b7dda;
        }
        .link-button.harmony {
            background-color: #FF9800;
        }
        .link-button.harmony:hover {
            background-color: #e68900;
        }
        .deezer-link {
            color: #666;
            font-size: 0.9em;
            margin-top: 10px;
        }
        .summary {
            background: white;
            padding: 15px;
            border-radius: 8px;
            margin-bottom: 20px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
    </style>
</head>
<body>
    <h1>🎵 Missing Albums - MusicBrainz Submission Links</h1>
    <div class="summary">
        <strong>Total missing albums: {count}</strong>
    </div>
"""

    album_html = """
    <div class="album">
        <div class="album-title">{title}</div>
        <div class="artist-name">by {artist}</div>
        <div class="links">
            <a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
            <a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
        </div>
        <div class="deezer-link">
            <a href="{deezer_url}" target="_blank">View on Deezer</a>
        </div>
    </div>
"""

    albums_html = ""
    for album in albums:
        submission_links = album.get('submission_links', {})
        albums_html += album_html.format(
            title=album.get('title', 'Unknown Title'),
            artist=album.get('artist_name', 'Unknown Artist'),
            atisket_link=submission_links.get('atisket_link', '#'),
            harmony_link=submission_links.get('harmony_link', '#'),
            deezer_url=submission_links.get('deezer_url', '#')
        )

    html_content = html_content.format(count=len(albums)) + albums_html + """
</body>
</html>
"""

    output_file = "missing_albums.html"
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(html_content)
    print(f"📄 HTML report saved to {output_file}")


if __name__ == "__main__":
    main()