Add initial project files for MusicBrainz Missing Albums Finder
- Created .gitignore to exclude unnecessary files and directories. - Added flake.nix for Nix package management and development environment setup. - Introduced flake.lock to lock dependencies for reproducibility. - Implemented main.py script to identify missing albums on MusicBrainz from Deezer releases for artists monitored in Lidarr, including functionality for generating submission links.
This commit is contained in:
459
main.py
Executable file
459
main.py
Executable file
@@ -0,0 +1,459 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to identify missing albums on MusicBrainz from Deezer releases
|
||||
for artists monitored in Lidarr, and generate submission links.
|
||||
|
||||
This script:
|
||||
1. Fetches artists from Lidarr with monitorNewItems set to "new" or "all"
|
||||
2. Uses SAMBL to find albums missing on MusicBrainz from Deezer releases
|
||||
3. Generates a-tisket/harmony links for submitting albums to MusicBrainz
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
from typing import List, Dict, Optional
|
||||
from urllib.parse import quote
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class LidarrClient:
|
||||
"""Client for interacting with Lidarr API"""
|
||||
|
||||
def __init__(self, base_url: str, api_key: str):
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.api_key = api_key
|
||||
self.headers = {'X-Api-Key': api_key}
|
||||
|
||||
def get_artists(self) -> List[Dict]:
|
||||
"""Fetch all artists from Lidarr"""
|
||||
url = f"{self.base_url}/api/v1/artist"
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error fetching artists from Lidarr: {e}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
def get_monitored_artists(self, monitor_types: List[str] = None) -> List[Dict]:
|
||||
"""
|
||||
Get artists with monitorNewItems set to specified values
|
||||
|
||||
Args:
|
||||
monitor_types: List of monitorNewItems values to filter by (default: ['new', 'all'])
|
||||
|
||||
Returns:
|
||||
List of artist dictionaries matching the criteria
|
||||
"""
|
||||
if monitor_types is None:
|
||||
monitor_types = ['new', 'all']
|
||||
|
||||
all_artists = self.get_artists()
|
||||
filtered = [
|
||||
artist for artist in all_artists
|
||||
if artist.get('monitorNewItems') in monitor_types
|
||||
]
|
||||
return filtered
|
||||
|
||||
|
||||
class SamblClient:
|
||||
"""
|
||||
Client for interacting with SAMBL API to find missing albums.
|
||||
|
||||
SAMBL (Streaming Artist MusicBrainz Lookup) is available at:
|
||||
- Website: https://sambl.lioncat6.com
|
||||
- GitHub: https://github.com/Lioncat6/SAMBL-React
|
||||
- API Root: https://sambl.lioncat6.com/api/
|
||||
"""
|
||||
|
||||
def __init__(self, base_url: str = None):
|
||||
# Default to the public SAMBL instance if no URL provided
|
||||
self.base_url = (base_url or "https://sambl.lioncat6.com").rstrip('/')
|
||||
|
||||
def _search_deezer_artist(self, artist_name: str) -> Optional[str]:
|
||||
"""
|
||||
Search for an artist on Deezer and return their Deezer ID.
|
||||
Uses Deezer API directly since SAMBL's searchArtists endpoint is unsupported.
|
||||
|
||||
Args:
|
||||
artist_name: Name of the artist to search for
|
||||
|
||||
Returns:
|
||||
Deezer artist ID as string, or None if not found
|
||||
"""
|
||||
try:
|
||||
# Use Deezer API to search for the artist
|
||||
deezer_search_url = "https://api.deezer.com/search/artist"
|
||||
params = {'q': artist_name, 'limit': 1}
|
||||
response = requests.get(deezer_search_url, params=params, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
if data.get('data') and len(data['data']) > 0:
|
||||
artist_id = str(data['data'][0]['id'])
|
||||
print(f" [Sambl] Found Deezer artist ID: {artist_id}")
|
||||
return artist_id
|
||||
else:
|
||||
print(f" [Sambl] ⚠️ Artist '{artist_name}' not found on Deezer")
|
||||
return None
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f" [Sambl] ⚠️ Error searching Deezer for artist: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def find_missing_albums(self, artist_mbid: str, artist_name: str) -> List[Dict]:
|
||||
"""
|
||||
Find albums missing on MusicBrainz from Deezer releases for an artist.
|
||||
|
||||
Uses SAMBL's /api/compareArtistAlbums endpoint which compares albums
|
||||
from Deezer with MusicBrainz and identifies missing ones.
|
||||
|
||||
Args:
|
||||
artist_mbid: MusicBrainz ID of the artist
|
||||
artist_name: Name of the artist
|
||||
|
||||
Returns:
|
||||
List of album dictionaries with Deezer URLs and metadata
|
||||
Format:
|
||||
[
|
||||
{
|
||||
'title': 'Album Title',
|
||||
'deezer_url': 'https://www.deezer.com/album/123456789',
|
||||
'deezer_id': '123456789',
|
||||
'release_date': '2024-01-01',
|
||||
'artist_name': artist_name
|
||||
}
|
||||
]
|
||||
"""
|
||||
print(f" [Sambl] Checking for missing albums for {artist_name} (MBID: {artist_mbid})")
|
||||
|
||||
# First, we need to find the Deezer artist ID
|
||||
deezer_artist_id = self._search_deezer_artist(artist_name)
|
||||
if not deezer_artist_id:
|
||||
return []
|
||||
|
||||
# Now use SAMBL's compareArtistAlbums endpoint
|
||||
try:
|
||||
api_url = f"{self.base_url}/api/compareArtistAlbums"
|
||||
params = {
|
||||
'provider_id': deezer_artist_id,
|
||||
'provider': 'deezer',
|
||||
'mbid': artist_mbid,
|
||||
'full': 'true' # Get full information including missing albums
|
||||
}
|
||||
|
||||
response = requests.get(api_url, params=params, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
|
||||
# Parse the response to extract missing albums
|
||||
# The response structure may vary, so we'll handle different formats
|
||||
missing_albums = []
|
||||
|
||||
# SAMBL typically returns albums with status indicators
|
||||
# Missing albums are usually marked as not found in MusicBrainz
|
||||
albums = []
|
||||
if isinstance(data, dict):
|
||||
# Check for common response structures
|
||||
albums = data.get('albums', [])
|
||||
if not albums and isinstance(data.get('data'), list):
|
||||
albums = data.get('data', [])
|
||||
elif isinstance(data, list):
|
||||
albums = data
|
||||
|
||||
for album in albums:
|
||||
# Look for albums that are missing from MusicBrainz
|
||||
# SAMBL typically marks these with status like 'missing', 'not_found', etc.
|
||||
status = str(album.get('status', '')).lower()
|
||||
mb_status = str(album.get('musicbrainz_status', '')).lower()
|
||||
|
||||
# Check if album is missing (not linked to MusicBrainz)
|
||||
# SAMBL marks missing albums with various indicators
|
||||
is_missing = (
|
||||
'missing' in status or
|
||||
'not_found' in status or
|
||||
'not_linked' in status or
|
||||
'orange' in status or # SAMBL uses orange status for albums not linked
|
||||
album.get('musicbrainz_id') is None or
|
||||
album.get('musicbrainz_id') == '' or
|
||||
album.get('mbid') is None or
|
||||
album.get('mbid') == ''
|
||||
)
|
||||
|
||||
if is_missing:
|
||||
# Extract Deezer URL and album info
|
||||
deezer_id = str(album.get('id') or album.get('deezer_id') or album.get('deezerId') or '')
|
||||
if deezer_id and deezer_id != 'None':
|
||||
deezer_url = f"https://www.deezer.com/album/{deezer_id}"
|
||||
|
||||
missing_albums.append({
|
||||
'title': album.get('title') or album.get('name') or 'Unknown Title',
|
||||
'deezer_url': deezer_url,
|
||||
'deezer_id': deezer_id,
|
||||
'release_date': album.get('release_date') or album.get('releaseDate') or album.get('release') or '',
|
||||
'artist_name': artist_name,
|
||||
'cover_url': album.get('cover') or album.get('cover_medium') or album.get('coverUrl') or album.get('cover_medium') or ''
|
||||
})
|
||||
|
||||
if missing_albums:
|
||||
print(f" [Sambl] ✓ Found {len(missing_albums)} missing album(s)")
|
||||
else:
|
||||
print(f" [Sambl] ✓ No missing albums found")
|
||||
|
||||
return missing_albums
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f" [Sambl] ⚠️ Error calling SAMBL API: {e}", file=sys.stderr)
|
||||
return []
|
||||
except (KeyError, ValueError, TypeError) as e:
|
||||
print(f" [Sambl] ⚠️ Error parsing SAMBL response: {e}", file=sys.stderr)
|
||||
print(f" [Sambl] Response: {response.text[:200] if 'response' in locals() else 'N/A'}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
|
||||
class SubmissionLinkGenerator:
|
||||
"""Generate submission links for MusicBrainz using a-tisket and Harmony"""
|
||||
|
||||
@staticmethod
|
||||
def generate_atisket_link(deezer_url: str) -> str:
|
||||
"""Generate an a-tisket submission link from a Deezer URL"""
|
||||
encoded_url = quote(deezer_url, safe='')
|
||||
return f"https://atisket.pulsewidth.org.uk/?url={encoded_url}"
|
||||
|
||||
@staticmethod
|
||||
def generate_harmony_link(deezer_url: str) -> str:
|
||||
"""Generate a Harmony submission link from a Deezer URL"""
|
||||
encoded_url = quote(deezer_url, safe='')
|
||||
return f"https://harmony.pulsewidth.org.uk/?url={encoded_url}"
|
||||
|
||||
@staticmethod
|
||||
def generate_links(deezer_url: str) -> Dict[str, str]:
|
||||
"""Generate both a-tisket and Harmony links"""
|
||||
return {
|
||||
'deezer_url': deezer_url,
|
||||
'atisket_link': SubmissionLinkGenerator.generate_atisket_link(deezer_url),
|
||||
'harmony_link': SubmissionLinkGenerator.generate_harmony_link(deezer_url)
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Main execution function"""
|
||||
# Configuration - loaded from .env file or environment variables
|
||||
LIDARR_URL = os.getenv("LIDARR_URL")
|
||||
LIDARR_API_KEY = os.getenv("LIDARR_API_KEY")
|
||||
SAMBL_URL = os.getenv("SAMBL_URL") or None # Set if Sambl has a web API
|
||||
MAX_ARTISTS = int(os.getenv("MAX_ARTISTS", "5")) # Limit number of artists to process (default: 5)
|
||||
|
||||
# Validate required configuration
|
||||
if not LIDARR_URL:
|
||||
print("Error: LIDARR_URL not set. Please set it in .env file or environment variables.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
if not LIDARR_API_KEY:
|
||||
print("Error: LIDARR_API_KEY not set. Please set it in .env file or environment variables.", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Initialize clients
|
||||
lidarr = LidarrClient(LIDARR_URL, LIDARR_API_KEY)
|
||||
sambl = SamblClient(SAMBL_URL)
|
||||
|
||||
print("Fetching monitored artists from Lidarr...")
|
||||
artists = lidarr.get_monitored_artists(['new', 'all'])
|
||||
|
||||
if not artists:
|
||||
print("No artists found with monitorNewItems set to 'new' or 'all'")
|
||||
return
|
||||
|
||||
total_artists = len(artists)
|
||||
|
||||
# Limit the number of artists for testing
|
||||
if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS:
|
||||
print(f"Found {total_artists} monitored artists (limiting to {MAX_ARTISTS} for testing)")
|
||||
artists = artists[:MAX_ARTISTS]
|
||||
else:
|
||||
print(f"Found {total_artists} monitored artists")
|
||||
print("\n" + "="*80)
|
||||
|
||||
all_missing_albums = []
|
||||
|
||||
for artist in artists:
|
||||
artist_name = artist.get('artistName', 'Unknown')
|
||||
artist_mbid = artist.get('foreignArtistId') or artist.get('mbid')
|
||||
|
||||
if not artist_mbid:
|
||||
print(f"\n⚠️ Skipping {artist_name} - no MusicBrainz ID found")
|
||||
continue
|
||||
|
||||
print(f"\n🎵 Artist: {artist_name}")
|
||||
print(f" MusicBrainz ID: {artist_mbid}")
|
||||
|
||||
# Find missing albums using Sambl
|
||||
missing_albums = sambl.find_missing_albums(artist_mbid, artist_name)
|
||||
|
||||
if missing_albums:
|
||||
print(f" Found {len(missing_albums)} missing album(s):")
|
||||
for album in missing_albums:
|
||||
deezer_url = album.get('deezer_url')
|
||||
if deezer_url:
|
||||
links = SubmissionLinkGenerator.generate_links(deezer_url)
|
||||
album['submission_links'] = links
|
||||
all_missing_albums.append(album)
|
||||
|
||||
print(f" 📀 {album.get('title', 'Unknown Title')}")
|
||||
print(f" Deezer: {deezer_url}")
|
||||
print(f" a-tisket: {links['atisket_link']}")
|
||||
print(f" Harmony: {links['harmony_link']}")
|
||||
else:
|
||||
print(f" ✓ No missing albums found")
|
||||
|
||||
# Generate summary report
|
||||
print("\n" + "="*80)
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" Artists processed: {len(artists)}" + (f" (of {total_artists} total)" if MAX_ARTISTS > 0 and total_artists > MAX_ARTISTS else ""))
|
||||
print(f" Total missing albums found: {len(all_missing_albums)}")
|
||||
|
||||
# Save results to JSON file
|
||||
if all_missing_albums:
|
||||
output_file = "missing_albums.json"
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(all_missing_albums, f, indent=2, ensure_ascii=False)
|
||||
print(f"\n💾 Results saved to {output_file}")
|
||||
|
||||
# Generate HTML report with clickable links
|
||||
generate_html_report(all_missing_albums)
|
||||
else:
|
||||
print("\n✨ All albums are already on MusicBrainz!")
|
||||
|
||||
|
||||
def generate_html_report(albums: List[Dict]):
|
||||
"""Generate an HTML report with clickable submission links"""
|
||||
html_content = """<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Missing Albums - MusicBrainz Submission Links</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 20px;
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
h1 {
|
||||
color: #333;
|
||||
border-bottom: 3px solid #4CAF50;
|
||||
padding-bottom: 10px;
|
||||
}
|
||||
.album {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
padding: 20px;
|
||||
margin: 20px 0;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
.album-title {
|
||||
font-size: 1.5em;
|
||||
font-weight: bold;
|
||||
color: #2196F3;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
.artist-name {
|
||||
color: #666;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.links {
|
||||
display: flex;
|
||||
gap: 10px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
.link-button {
|
||||
display: inline-block;
|
||||
padding: 10px 20px;
|
||||
background-color: #4CAF50;
|
||||
color: white;
|
||||
text-decoration: none;
|
||||
border-radius: 5px;
|
||||
transition: background-color 0.3s;
|
||||
}
|
||||
.link-button:hover {
|
||||
background-color: #45a049;
|
||||
}
|
||||
.link-button.atisket {
|
||||
background-color: #2196F3;
|
||||
}
|
||||
.link-button.atisket:hover {
|
||||
background-color: #0b7dda;
|
||||
}
|
||||
.link-button.harmony {
|
||||
background-color: #FF9800;
|
||||
}
|
||||
.link-button.harmony:hover {
|
||||
background-color: #e68900;
|
||||
}
|
||||
.deezer-link {
|
||||
color: #666;
|
||||
font-size: 0.9em;
|
||||
margin-top: 10px;
|
||||
}
|
||||
.summary {
|
||||
background: white;
|
||||
padding: 15px;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 20px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>🎵 Missing Albums - MusicBrainz Submission Links</h1>
|
||||
<div class="summary">
|
||||
<strong>Total missing albums: {count}</strong>
|
||||
</div>
|
||||
"""
|
||||
|
||||
album_html = """
|
||||
<div class="album">
|
||||
<div class="album-title">{title}</div>
|
||||
<div class="artist-name">by {artist}</div>
|
||||
<div class="links">
|
||||
<a href="{atisket_link}" target="_blank" class="link-button atisket">Submit via a-tisket</a>
|
||||
<a href="{harmony_link}" target="_blank" class="link-button harmony">Submit via Harmony</a>
|
||||
</div>
|
||||
<div class="deezer-link">
|
||||
<a href="{deezer_url}" target="_blank">View on Deezer</a>
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
|
||||
albums_html = ""
|
||||
for album in albums:
|
||||
submission_links = album.get('submission_links', {})
|
||||
albums_html += album_html.format(
|
||||
title=album.get('title', 'Unknown Title'),
|
||||
artist=album.get('artist_name', 'Unknown Artist'),
|
||||
atisket_link=submission_links.get('atisket_link', '#'),
|
||||
harmony_link=submission_links.get('harmony_link', '#'),
|
||||
deezer_url=submission_links.get('deezer_url', '#')
|
||||
)
|
||||
|
||||
html_content = html_content.format(count=len(albums)) + albums_html + """
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
output_file = "missing_albums.html"
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(html_content)
|
||||
print(f"📄 HTML report saved to {output_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user