- Created .gitignore to exclude unnecessary files and directories. - Added flake.nix for Nix package management and development environment setup. - Introduced flake.lock to lock dependencies for reproducibility. - Implemented main.py script to identify missing albums on MusicBrainz from Deezer releases for artists monitored in Lidarr, including functionality for generating submission links.
460 lines
17 KiB
Python
Executable File
460 lines
17 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Script to identify missing albums on MusicBrainz from Deezer releases
|
|
for artists monitored in Lidarr, and generate submission links.
|
|
|
|
This script:
|
|
1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all"
|
|
2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases
|
|
3. Generates a-tisket/harmony links for submitting albums to MusicBrainz
|
|
"""
|
|
|
|
import requests
|
|
import json
|
|
import sys
|
|
import os
|
|
from typing import List, Dict, Optional
|
|
from urllib.parse import quote
|
|
from dotenv import load_dotenv
|
|
|
|
# Load environment variables from .env file
|
|
load_dotenv()
|
|
|
|
|
|
class LidarrClient:
|
|
"""Client for interacting with Lidarr API"""
|
|
|
|
def __init__(self, base_url: str, api_key: str):
|
|
self.base_url = base_url.rstrip('/')
|
|
self.api_key = api_key
|
|
self.headers = {'X-Api-Key': api_key}
|
|
|
|
def get_artists(self) -> List[Dict]:
|
|
"""Fetch all artists from Lidarr"""
|
|
url = f"{self.base_url}/api/v1/artist"
|
|
try:
|
|
response = requests.get(url, headers=self.headers)
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.exceptions.RequestException as e:
|
|
print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr)
|
|
return []
|
|
|
|
def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]:
|
|
"""
|
|
Get artists with monitorNewItems set to specified values
|
|
|
|
Args:
|
|
monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all'])
|
|
|
|
Returns:
|
|
List of artist dictionaries matching the criteria
|
|
"""
|
|
if monitor_types is None:
|
|
monitor_types = ['new', 'all']
|
|
|
|
all_artists = self.get_artists()
|
|
filtered = [
|
|
artist for artist in all_artists
|
|
if artist.get('monitorNewItems') in monitor_types
|
|
]
|
|
return filtered
|
|
|
|
|
|
class SamblClient:
|
|
"""
|
|
Client for interacting with SAMBL API to find missing albums.
|
|
|
|
SAMBL (Streaming Artist MusicBrainz Lookup) is available at:
|
|
- Website: https://sambl.lioncat6.com
|
|
- GitHub: https://github.com/Lioncat6/SAMBL-React
|
|
- API Root: https://sambl.lioncat6.com/api/
|
|
"""
|
|
|
|
def __init__(self, base_url: str = None):
|
|
# Default to the public SAMBL instance if no URL provided
|
|
self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/')
|
|
|
|
def _search_deezer_artist(self, artist_name: str) -> Optional[str]:
|
|
"""
|
|
Search for an artist on Deezer and return their Deezer ID.
|
|
Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported.
|
|
|
|
Args:
|
|
artist_name: Name of the artist to search for
|
|
|
|
Returns:
|
|
Deezer artist ID as string, or None if not found
|
|
"""
|
|
try:
|
|
# Use Deezer API to search for the artist
|
|
deezer_search_url = "https://api.deezer.com/search/artist"
|
|
params = {'q': artist_name, 'limit': 1}
|
|
response = requests.get(deezer_search_url, params=params, timeout=10)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
if data.get('data') and len(data['data']) > 0:
|
|
artist_id = str(data['data'][0]['id'])
|
|
print(f" [Sambl] Found Deezer artist ID: {artist_id}")
|
|
return artist_id
|
|
else:
|
|
print(f" [Sambl] ⚠️ Artist '{artist_name}' not found on Deezer")
|
|
return None
|
|
except requests.exceptions.RequestException as e:
|
|
print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]:
|
|
"""
|
|
Find albums missing on MusicBrainz from Deezer releases for an artist.
|
|
|
|
Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums
|
|
from Deezer with MusicBrainz and identifies missing ones.
|
|
|
|
Args:
|
|
artist_mbid: MusicBrainz ID of the artist
|
|
artist_name: Name of the artist
|
|
|
|
Returns:
|
|
List of album dictionaries with Deezer URLs and metadata
|
|
Format:
|
|
[
|
|
{
|
|
'title': 'Album Title',
|
|
'deezer_url': 'https://www.deezer.com/album/123456789',
|
|
'deezer_id': '123456789',
|
|
'release_date': '2024-01-01',
|
|
'artist_name': artist_name
|
|
}
|
|
]
|
|
"""
|
|
print(f" [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})")
|
|
|
|
# First, we need to find the Deezer artist ID
|
|
deezer_artist_id = self._search_deezer_artist(artist_name)
|
|
if not deezer_artist_id:
|
|
return []
|
|
|
|
# Now use SAMBL's compareArtistAlbums endpoint
|
|
try:
|
|
api_url = f"{self.base_url}/api/compareArtistAlbums"
|
|
params = {
|
|
'provider_id': deezer_artist_id,
|
|
'provider': 'deezer',
|
|
'mbid': artist_mbid,
|
|
'full': 'true' # Get full information including missing albums
|
|
}
|
|
|
|
response = requests.get(api_url, params=params, timeout=30)
|
|
response.raise_for_status()
|
|
|
|
data = response.json()
|
|
|
|
# Parse the response to extract missing albums
|
|
# The response structure may vary, so we'll handle different formats
|
|
missing_albums = []
|
|
|
|
# SAMBL typically returns albums with status indicators
|
|
# Missing albums are usually marked as not found in MusicBrainz
|
|
albums = []
|
|
if isinstance(data, dict):
|
|
# Check for common response structures
|
|
albums = data.get('albums', [])
|
|
if not albums and isinstance(data.get('data'), list):
|
|
albums = data.get('data', [])
|
|
elif isinstance(data, list):
|
|
albums = data
|
|
|
|
for album in albums:
|
|
# Look for albums that are missing from MusicBrainz
|
|
# SAMBL typically marks these with status like 'missing', 'not_found', etc.
|
|
status = str(album.get('status', '')).lower()
|
|
mb_status = str(album.get('musicbrainz_status', '')).lower()
|
|
|
|
# Check if album is missing (not linked to MusicBrainz)
|
|
# SAMBL marks missing albums with various indicators
|
|
is_missing = (
|
|
'missing' in status or
|
|
'not_found' in status or
|
|
'not_linked' in status or
|
|
'orange' in status or # SAMBL uses orange status for albums not linked
|
|
album.get('musicbrainz_id') is None or
|
|
album.get('musicbrainz_id') == '' or
|
|
album.get('mbid') is None or
|
|
album.get('mbid') == ''
|
|
)
|
|
|
|
if is_missing:
|
|
# Extract Deezer URL and album info
|
|
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
|
|
if deezer_id and deezer_id != 'None':
|
|
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
|
|
|
|
missing_albums.append({
|
|
'title': album.get('title') or album.get('name') or 'Unknown Title',
|
|
'deezer_url': deezer_url,
|
|
'deezer_id': deezer_id,
|
|
'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '',
|
|
'artist_name': artist_name,
|
|
'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or ''
|
|
})
|
|
|
|
if missing_albums:
|
|
print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)")
|
|
else:
|
|
print(f" [Sambl] ✓ No missing albums found")
|
|
|
|
return missing_albums
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr)
|
|
return []
|
|
except (KeyError, ValueError, TypeError) as e:
|
|
print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr)
|
|
print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
|
|
return []
|
|
|
|
|
|
class SubmissionLinkGenerator:
|
|
"""Generate submission links for MusicBrainz using a-tisket and Harmony"""
|
|
|
|
@staticmethod
|
|
def generate_atisket_link(deezer_url: str) -> str:
|
|
"""Generate an a-tisket submission link from a Deezer URL"""
|
|
encoded_url = quote(deezer_url, safe='')
|
|
return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}"
|
|
|
|
@staticmethod
|
|
def generate_harmony_link(deezer_url: str) -> str:
|
|
"""Generate a Harmony submission link from a Deezer URL"""
|
|
encoded_url = quote(deezer_url, safe='')
|
|
return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}"
|
|
|
|
@staticmethod
|
|
def generate_links(deezer_url: str) -> Dict[str, str]:
|
|
"""Generate both a-tisket and Harmony links"""
|
|
return {
|
|
'deezer_url': deezer_url,
|
|
'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url),
|
|
'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url)
|
|
}
|
|
|
|
|
|
def main():
|
|
"""Main execution function"""
|
|
# Configuration - loaded from .env file or environment variables
|
|
LIDARR_URL = os.getenv("LIDARR_URL")
|
|
LIDARR_API_KEY = os.getenv("LIDARR_API_KEY")
|
|
SAMBL_URL = os.getenv("SAMBL_URL") or None # Set if Sambl has a web API
|
|
MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5")) # Limit number of artists to process (default: 5)
|
|
|
|
# Validate required configuration
|
|
if not LIDARR_URL:
|
|
print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if not LIDARR_API_KEY:
|
|
print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
# Initialize clients
|
|
lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY)
|
|
sambl = SamblClient(SAMBL_URL)
|
|
|
|
print("Fetching monitored artists from Lidarr...")
|
|
artists = lidarr.get_monitored_artists(['new', 'all'])
|
|
|
|
if not artists:
|
|
print("No artists found with monitorNewItems set to 'new' or 'all'")
|
|
return
|
|
|
|
total_artists = len(artists)
|
|
|
|
# Limit the number of artists for testing
|
|
if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS:
|
|
print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)")
|
|
artists = artists[:MAX_ARTISTS]
|
|
else:
|
|
print(f"Found {total_artists} monitored artists")
|
|
print("\n" + "="*80)
|
|
|
|
all_missing_albums = []
|
|
|
|
for artist in artists:
|
|
artist_name = artist.get('artistName', 'Unknown')
|
|
artist_mbid = artist.get('foreignArtistId') or artist.get('mbid')
|
|
|
|
if not artist_mbid:
|
|
print(f"\n⚠️ Skipping {artist_name} - no MusicBrainz ID found")
|
|
continue
|
|
|
|
print(f"\n🎵 Artist: {artist_name}")
|
|
print(f" MusicBrainz ID: {artist_mbid}")
|
|
|
|
# Find missing albums using Sambl
|
|
missing_albums = sambl.find_missing_albums(artist_mbid, artist_name)
|
|
|
|
if missing_albums:
|
|
print(f" Found {len(missing_albums)} missing album(s):")
|
|
for album in missing_albums:
|
|
deezer_url = album.get('deezer_url')
|
|
if deezer_url:
|
|
links = SubmissionLinkGenerator.generate_links(deezer_url)
|
|
album['submission_links'] = links
|
|
all_missing_albums.append(album)
|
|
|
|
print(f" 📀 {album.get('title', 'Unknown Title')}")
|
|
print(f" Deezer: {deezer_url}")
|
|
print(f" a-tisket: {links['atisket_link']}")
|
|
print(f" Harmony: {links['harmony_link']}")
|
|
else:
|
|
print(f" ✓ No missing albums found")
|
|
|
|
# Generate summary report
|
|
print("\n" + "="*80)
|
|
print(f"\n📊 Summary:")
|
|
print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
|
|
print(f" Total missing albums found: {len(all_missing_albums)}")
|
|
|
|
# Save results to JSON file
|
|
if all_missing_albums:
|
|
output_file = "missing_albums.json"
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(all_missing_albums, f, indent=2, ensure_ascii=False)
|
|
print(f"\n💾 Results saved to {output_file}")
|
|
|
|
# Generate HTML report with clickable links
|
|
generate_html_report(all_missing_albums)
|
|
else:
|
|
print("\n✨ All albums are already on MusicBrainz!")
|
|
|
|
|
|
def generate_html_report(albums: List[Dict]):
|
|
"""Generate an HTML report with clickable submission links"""
|
|
html_content = """<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>Missing Albums - MusicBrainz Submission Links</title>
|
|
<style>
|
|
body {
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
|
max-width: 1200px;
|
|
margin: 0 auto;
|
|
padding: 20px;
|
|
background-color: #f5f5f5;
|
|
}
|
|
h1 {
|
|
color: #333;
|
|
border-bottom: 3px solid #4CAF50;
|
|
padding-bottom: 10px;
|
|
}
|
|
.album {
|
|
background: white;
|
|
border-radius: 8px;
|
|
padding: 20px;
|
|
margin: 20px 0;
|
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
}
|
|
.album-title {
|
|
font-size: 1.5em;
|
|
font-weight: bold;
|
|
color: #2196F3;
|
|
margin-bottom: 10px;
|
|
}
|
|
.artist-name {
|
|
color: #666;
|
|
margin-bottom: 15px;
|
|
}
|
|
.links {
|
|
display: flex;
|
|
gap: 10px;
|
|
flex-wrap: wrap;
|
|
}
|
|
.link-button {
|
|
display: inline-block;
|
|
padding: 10px 20px;
|
|
background-color: #4CAF50;
|
|
color: white;
|
|
text-decoration: none;
|
|
border-radius: 5px;
|
|
transition: background-color 0.3s;
|
|
}
|
|
.link-button:hover {
|
|
background-color: #45a049;
|
|
}
|
|
.link-button.atisket {
|
|
background-color: #2196F3;
|
|
}
|
|
.link-button.atisket:hover {
|
|
background-color: #0b7dda;
|
|
}
|
|
.link-button.harmony {
|
|
background-color: #FF9800;
|
|
}
|
|
.link-button.harmony:hover {
|
|
background-color: #e68900;
|
|
}
|
|
.deezer-link {
|
|
color: #666;
|
|
font-size: 0.9em;
|
|
margin-top: 10px;
|
|
}
|
|
.summary {
|
|
background: white;
|
|
padding: 15px;
|
|
border-radius: 8px;
|
|
margin-bottom: 20px;
|
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>🎵 Missing Albums - MusicBrainz Submission Links</h1>
|
|
<div class="summary">
|
|
<strong>Total missing albums: {count}</strong>
|
|
</div>
|
|
"""
|
|
|
|
album_html = """
|
|
<div class="album">
|
|
<div class="album-title">{title}</div>
|
|
<div class="artist-name">by {artist}</div>
|
|
<div class="links">
|
|
<a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
|
|
<a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
|
|
</div>
|
|
<div class="deezer-link">
|
|
<a href="{deezer_url}" target="_blank">View on Deezer</a>
|
|
</div>
|
|
</div>
|
|
"""
|
|
|
|
albums_html = ""
|
|
for album in albums:
|
|
submission_links = album.get('submission_links', {})
|
|
albums_html += album_html.format(
|
|
title=album.get('title', 'Unknown Title'),
|
|
artist=album.get('artist_name', 'Unknown Artist'),
|
|
atisket_link=submission_links.get('atisket_link', '#'),
|
|
harmony_link=submission_links.get('harmony_link', '#'),
|
|
deezer_url=submission_links.get('deezer_url', '#')
|
|
)
|
|
|
|
html_content = html_content.format(count=len(albums)) + albums_html + """
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
output_file = "missing_albums.html"
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
f.write(html_content)
|
|
print(f"📄 HTML report saved to {output_file}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|
|
|