Add initial project files for MusicBrainz Missing Albums Finder

- Created .gitignore to exclude unnecessary files and directories.
- Added flake.nix for Nix package management and development environment setup.
- Introduced flake.lock to lock dependencies for reproducibility.
- Implemented main.py script to identify missing albums on MusicBrainz from Deezer releases for artists monitored in Lidarr, including functionality for generating submission links.
This commit is contained in:
Danilo Reyes
2025-11-11 09:35:54 -06:00
commit 51df3f15db
4 changed files with 585 additions and 0 deletions

459
main.py Executable file
View File

@@ -0,0 +1,459 @@
#!/usr/bin/env python3
"""
Script to identify missing albums on MusicBrainz from Deezer releases
for artists monitored in Lidarr, and generate submission links.
This script:
1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all"
2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases
3. Generates a-tisket/harmony links for submitting albums to MusicBrainz
"""
import requests
import json
import sys
import os
from typing import List, Dict, Optional
from urllib.parse import quote
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class LidarrClient:
"""Client for interacting with Lidarr API"""
def __init__(self, base_url: str, api_key: str):
self.base_url = base_url.rstrip('/')
self.api_key = api_key
self.headers = {'X-Api-Key': api_key}
def get_artists(self) -> List[Dict]:
"""Fetch all artists from Lidarr"""
url = f"{self.base_url}/api/v1/artist"
try:
response = requests.get(url, headers=self.headers)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr)
return []
def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]:
"""
Get artists with monitorNewItems set to specified values
Args:
monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all'])
Returns:
List of artist dictionaries matching the criteria
"""
if monitor_types is None:
monitor_types = ['new', 'all']
all_artists = self.get_artists()
filtered = [
artist for artist in all_artists
if artist.get('monitorNewItems') in monitor_types
]
return filtered
class SamblClient:
"""
Client for interacting with SAMBL API to find missing albums.
SAMBL (Streaming Artist MusicBrainz Lookup) is available at:
- Website: https://sambl.lioncat6.com
- GitHub: https://github.com/Lioncat6/SAMBL-React
- API Root: https://sambl.lioncat6.com/api/
"""
def __init__(self, base_url: str = None):
# Default to the public SAMBL instance if no URL provided
self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/')
def _search_deezer_artist(self, artist_name: str) -> Optional[str]:
"""
Search for an artist on Deezer and return their Deezer ID.
Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported.
Args:
artist_name: Name of the artist to search for
Returns:
Deezer artist ID as string, or None if not found
"""
try:
# Use Deezer API to search for the artist
deezer_search_url = "https://api.deezer.com/search/artist"
params = {'q': artist_name, 'limit': 1}
response = requests.get(deezer_search_url, params=params, timeout=10)
response.raise_for_status()
data = response.json()
if data.get('data') and len(data['data']) > 0:
artist_id = str(data['data'][0]['id'])
print(f" [Sambl] Found Deezer artist ID: {artist_id}")
return artist_id
else:
print(f" [Sambl] ⚠️ Artist '{artist_name}' not found on Deezer")
return None
except requests.exceptions.RequestException as e:
print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr)
return None
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]:
"""
Find albums missing on MusicBrainz from Deezer releases for an artist.
Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums
from Deezer with MusicBrainz and identifies missing ones.
Args:
artist_mbid: MusicBrainz ID of the artist
artist_name: Name of the artist
Returns:
List of album dictionaries with Deezer URLs and metadata
Format:
[
{
'title': 'Album Title',
'deezer_url': 'https://www.deezer.com/album/123456789',
'deezer_id': '123456789',
'release_date': '2024-01-01',
'artist_name': artist_name
}
]
"""
print(f" [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})")
# First, we need to find the Deezer artist ID
deezer_artist_id = self._search_deezer_artist(artist_name)
if not deezer_artist_id:
return []
# Now use SAMBL's compareArtistAlbums endpoint
try:
api_url = f"{self.base_url}/api/compareArtistAlbums"
params = {
'provider_id': deezer_artist_id,
'provider': 'deezer',
'mbid': artist_mbid,
'full': 'true' # Get full information including missing albums
}
response = requests.get(api_url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
# Parse the response to extract missing albums
# The response structure may vary, so we'll handle different formats
missing_albums = []
# SAMBL typically returns albums with status indicators
# Missing albums are usually marked as not found in MusicBrainz
albums = []
if isinstance(data, dict):
# Check for common response structures
albums = data.get('albums', [])
if not albums and isinstance(data.get('data'), list):
albums = data.get('data', [])
elif isinstance(data, list):
albums = data
for album in albums:
# Look for albums that are missing from MusicBrainz
# SAMBL typically marks these with status like 'missing', 'not_found', etc.
status = str(album.get('status', '')).lower()
mb_status = str(album.get('musicbrainz_status', '')).lower()
# Check if album is missing (not linked to MusicBrainz)
# SAMBL marks missing albums with various indicators
is_missing = (
'missing' in status or
'not_found' in status or
'not_linked' in status or
'orange' in status or # SAMBL uses orange status for albums not linked
album.get('musicbrainz_id') is None or
album.get('musicbrainz_id') == '' or
album.get('mbid') is None or
album.get('mbid') == ''
)
if is_missing:
# Extract Deezer URL and album info
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
if deezer_id and deezer_id != 'None':
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
missing_albums.append({
'title': album.get('title') or album.get('name') or 'Unknown Title',
'deezer_url': deezer_url,
'deezer_id': deezer_id,
'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '',
'artist_name': artist_name,
'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or ''
})
if missing_albums:
print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)")
else:
print(f" [Sambl] ✓ No missing albums found")
return missing_albums
except requests.exceptions.RequestException as e:
print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr)
return []
except (KeyError, ValueError, TypeError) as e:
print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr)
print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
return []
class SubmissionLinkGenerator:
"""Generate submission links for MusicBrainz using a-tisket and Harmony"""
@staticmethod
def generate_atisket_link(deezer_url: str) -> str:
"""Generate an a-tisket submission link from a Deezer URL"""
encoded_url = quote(deezer_url, safe='')
return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}"
@staticmethod
def generate_harmony_link(deezer_url: str) -> str:
"""Generate a Harmony submission link from a Deezer URL"""
encoded_url = quote(deezer_url, safe='')
return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}"
@staticmethod
def generate_links(deezer_url: str) -> Dict[str, str]:
"""Generate both a-tisket and Harmony links"""
return {
'deezer_url': deezer_url,
'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url),
'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url)
}
def main():
"""Main execution function"""
# Configuration - loaded from .env file or environment variables
LIDARR_URL = os.getenv("LIDARR_URL")
LIDARR_API_KEY = os.getenv("LIDARR_API_KEY")
SAMBL_URL = os.getenv("SAMBL_URL") or None # Set if Sambl has a web API
MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5")) # Limit number of artists to process (default: 5)
# Validate required configuration
if not LIDARR_URL:
print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr)
sys.exit(1)
if not LIDARR_API_KEY:
print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr)
sys.exit(1)
# Initialize clients
lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY)
sambl = SamblClient(SAMBL_URL)
print("Fetching monitored artists from Lidarr...")
artists = lidarr.get_monitored_artists(['new', 'all'])
if not artists:
print("No artists found with monitorNewItems set to 'new' or 'all'")
return
total_artists = len(artists)
# Limit the number of artists for testing
if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS:
print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)")
artists = artists[:MAX_ARTISTS]
else:
print(f"Found {total_artists} monitored artists")
print("\n" + "="*80)
all_missing_albums = []
for artist in artists:
artist_name = artist.get('artistName', 'Unknown')
artist_mbid = artist.get('foreignArtistId') or artist.get('mbid')
if not artist_mbid:
print(f"\n⚠️ Skipping {artist_name} - no MusicBrainz ID found")
continue
print(f"\n🎵 Artist: {artist_name}")
print(f" MusicBrainz ID: {artist_mbid}")
# Find missing albums using Sambl
missing_albums = sambl.find_missing_albums(artist_mbid, artist_name)
if missing_albums:
print(f" Found {len(missing_albums)} missing album(s):")
for album in missing_albums:
deezer_url = album.get('deezer_url')
if deezer_url:
links = SubmissionLinkGenerator.generate_links(deezer_url)
album['submission_links'] = links
all_missing_albums.append(album)
print(f" 📀 {album.get('title', 'Unknown Title')}")
print(f" Deezer: {deezer_url}")
print(f" a-tisket: {links['atisket_link']}")
print(f" Harmony: {links['harmony_link']}")
else:
print(f" ✓ No missing albums found")
# Generate summary report
print("\n" + "="*80)
print(f"\n📊 Summary:")
print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
print(f" Total missing albums found: {len(all_missing_albums)}")
# Save results to JSON file
if all_missing_albums:
output_file = "missing_albums.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(all_missing_albums, f, indent=2, ensure_ascii=False)
print(f"\n💾 Results saved to {output_file}")
# Generate HTML report with clickable links
generate_html_report(all_missing_albums)
else:
print("\n✨ All albums are already on MusicBrainz!")
def generate_html_report(albums: List[Dict]):
"""Generate an HTML report with clickable submission links"""
html_content = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Missing Albums - MusicBrainz Submission Links</title>
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
max-width: 1200px;
margin: 0 auto;
padding: 20px;
background-color: #f5f5f5;
}
h1 {
color: #333;
border-bottom: 3px solid #4CAF50;
padding-bottom: 10px;
}
.album {
background: white;
border-radius: 8px;
padding: 20px;
margin: 20px 0;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.album-title {
font-size: 1.5em;
font-weight: bold;
color: #2196F3;
margin-bottom: 10px;
}
.artist-name {
color: #666;
margin-bottom: 15px;
}
.links {
display: flex;
gap: 10px;
flex-wrap: wrap;
}
.link-button {
display: inline-block;
padding: 10px 20px;
background-color: #4CAF50;
color: white;
text-decoration: none;
border-radius: 5px;
transition: background-color 0.3s;
}
.link-button:hover {
background-color: #45a049;
}
.link-button.atisket {
background-color: #2196F3;
}
.link-button.atisket:hover {
background-color: #0b7dda;
}
.link-button.harmony {
background-color: #FF9800;
}
.link-button.harmony:hover {
background-color: #e68900;
}
.deezer-link {
color: #666;
font-size: 0.9em;
margin-top: 10px;
}
.summary {
background: white;
padding: 15px;
border-radius: 8px;
margin-bottom: 20px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
</style>
</head>
<body>
<h1>🎵 Missing Albums - MusicBrainz Submission Links</h1>
<div class="summary">
<strong>Total missing albums: {count}</strong>
</div>
"""
album_html = """
<div class="album">
<div class="album-title">{title}</div>
<div class="artist-name">by {artist}</div>
<div class="links">
<a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
<a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
</div>
<div class="deezer-link">
<a href="{deezer_url}" target="_blank">View on Deezer</a>
</div>
</div>
"""
albums_html = ""
for album in albums:
submission_links = album.get('submission_links', {})
albums_html += album_html.format(
title=album.get('title', 'Unknown Title'),
artist=album.get('artist_name', 'Unknown Artist'),
atisket_link=submission_links.get('atisket_link', '#'),
harmony_link=submission_links.get('harmony_link', '#'),
deezer_url=submission_links.get('deezer_url', '#')
)
html_content = html_content.format(count=len(albums)) + albums_html + """
</body>
</html>
"""
output_file = "missing_albums.html"
with open(output_file, 'w', encoding='utf-8') as f:
f.write(html_content)
print(f"📄 HTML report saved to {output_file}")
if __name__ == "__main__":
main()