Files
lidarr-mb-gap/src-cleanup/main.py
Danilo Reyes af5a2bf825 Add audio verification and duplicate tracking features
- Integrated `plexapi` and `python-dotenv` as dependencies in `flake.nix` and `pyproject.toml` for enhanced functionality.
- Implemented new modules for audio verification and duplicate tracking, including `audio_verification.py`, `duplicate_finder.py`, and `track_verification.py`.
- Updated `main.py` to utilize the new modules for identifying and managing duplicate single tracks in Lidarr, with detailed logging and confidence scoring.
- Enhanced the `find_duplicate_singles` function to support audio verification results and metadata migration to Plex.
- Refactored existing code for improved structure and maintainability, ensuring better integration of new features.
2025-11-14 01:32:41 -06:00

244 lines
8.2 KiB
Python

#!/usr/bin/env python3
"""
lidarr_cleanup_singles
Identifies single-track releases that can safely be removed because
the same track already exists on a full album in Lidarr.
"""
import argparse
import logging
import os
import sys
from dotenv import load_dotenv
from duplicate_finder import build_album_track_map, find_duplicate_singles
from lidarr_client import fetch_all_artists, fetch_albums_for_artist
from plex_metadata import get_plex_server, migrate_plex_metadata
load_dotenv()
logger = logging.getLogger(__name__)
def main() -> None:
parser = argparse.ArgumentParser(
description="Identify single tracks that are duplicates of album tracks in Lidarr."
)
parser.add_argument(
"--base-url",
default=os.getenv("LIDARR_URL"),
help="Base URL of the Lidarr instance. Can also be set via LIDARR_URL env var.",
)
parser.add_argument(
"--api-key",
default=os.getenv("LIDARR_API_KEY"),
help="API key for Lidarr. Can also be set via LIDARR_API_KEY env var.",
)
parser.add_argument(
"--no-audio-verify",
action="store_true",
help="Skip audio fingerprint verification (faster but less accurate)",
)
parser.add_argument(
"--docker-mount",
default=os.getenv("DOCKER_MOUNT"),
help="Docker mount mapping in format 'container_path:host_path'. Can also be set via DOCKER_MOUNT env var.",
)
parser.add_argument(
"--debug",
action="store_true",
help="Enable debug logging",
)
parser.add_argument(
"--migrate-metadata",
action="store_true",
help="Migrate metadata (ratings, play counts) from singles to album tracks. Only applies to perfect matches (confidence >= 95).",
)
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.INFO,
format="[%(levelname)s] %(message)s",
handlers=[logging.StreamHandler(sys.stdout)],
)
if not args.base_url:
logger.error(
"LIDARR_URL not set. Provide --base-url or set LIDARR_URL environment variable."
)
sys.exit(1)
if not args.api_key:
logger.error(
"LIDARR_API_KEY not set. Provide --api-key or set LIDARR_API_KEY environment variable."
)
sys.exit(1)
base_url = args.base_url.rstrip("/")
headers = {"X-Api-Key": args.api_key}
logger.info("Fetching artists...")
artists = fetch_all_artists(base_url, headers)
if not artists:
logger.warning("No artists found. Exiting.")
return
artist_map = {
artist.get("id"): artist.get("artistName", "Unknown")
for artist in artists
if artist.get("id")
}
logger.info("Fetching albums for each artist...")
albums = [
album
for artist in artists
if artist.get("id")
for album in fetch_albums_for_artist(base_url, headers, artist["id"])
]
if not albums:
logger.warning("No albums found in the library.")
return
logger.info("Building album track map...")
album_track_map = build_album_track_map(base_url, headers, albums)
verify_audio = not args.no_audio_verify
docker_mount = args.docker_mount if args.docker_mount else None
if not verify_audio:
logger.info(
"Scanning for duplicate singles (audio verification disabled - using title matching only)..."
)
else:
mount_msg = f" (Docker mount: {docker_mount})" if docker_mount else ""
logger.info(
f"Scanning for duplicate singles with audio verification{mount_msg}..."
)
logger.info(
"NOTE: Audio verification requires 'fpcalc' (chromaprint) to be installed"
)
duplicates = find_duplicate_singles(
base_url,
headers,
albums,
album_track_map,
verify_audio=verify_audio,
docker_mount=docker_mount,
)
if not duplicates:
logger.info("No duplicate singles found. The library appears clean.")
return
if args.migrate_metadata:
plex_url = os.getenv("PLEX_URL")
plex_token = os.getenv("PLEX_TOKEN")
if not (plex_url and plex_token):
logger.error(
"PLEX_URL and PLEX_TOKEN environment variables required for metadata migration"
)
logger.error("Set them in your .env file or environment")
return
logger.info(f"Connecting to Plex server at {plex_url}...")
plex_server = get_plex_server(plex_url, plex_token)
if not plex_server:
logger.error(
"Failed to connect to Plex server. Skipping metadata migration."
)
return
logger.info("Migrating Plex metadata for perfect matches (confidence >= 95)...")
migration_count = 0
for dup in duplicates:
for album_track in dup.get("verified_albums", []):
if album_track.get("migration_status") != "eligible":
continue
single_file_path = album_track.get("single_file_path")
album_file_path = album_track.get("album_file_path")
logger.info(
f"Migrating Plex metadata for '{dup['track_title']}' to album '{album_track['album_title']}'..."
)
success, message = migrate_plex_metadata(
plex_server, single_file_path, album_file_path, docker_mount
)
album_track["migration_message"] = message
album_track["migration_success"] = success
if success:
migration_count += 1
logger.info(f"{message}")
else:
logger.warning(f"{message}")
logger.info(f"Completed Plex metadata migration for {migration_count} track(s)")
logger.info("")
verified_count = sum(1 for dup in duplicates if dup.get("verified_albums"))
logger.info(
f"Found {len(duplicates)} single track(s) that are duplicates of album tracks ({verified_count} verified by audio fingerprint):"
)
for dup in duplicates:
artist_id = dup["artist_id"]
artist_name = artist_map.get(artist_id, f"Unknown (ID: {artist_id})")
duplicate_albums = dup["duplicate_albums"]
verified_albums = dup.get("verified_albums", duplicate_albums)
verification_results = dup.get("verification_results", [])
confidence_scores = dup.get("confidence_scores", [])
album_names = [album["album_title"] for album in duplicate_albums]
logger.info(f"Artist: {artist_name}")
logger.info(f" Single: '{dup['single_album_title']}'")
logger.info(
f" Track: '{dup['track_title']}' (trackFileId: {dup['single_track_file_id']})"
)
for i, result in enumerate(verification_results):
confidence = confidence_scores[i] if i < len(confidence_scores) else 0
logger.info(f" {result}")
logger.info(
f" Already present on {len(duplicate_albums)} album(s): {', '.join(album_names)}"
)
if verify_audio and not verified_albums:
logger.info(" ⚠ NOT safe to delete (audio verification failed)")
elif verify_audio:
verified_names = [album["album_title"] for album in verified_albums]
max_confidence = max(confidence_scores) if confidence_scores else 0
logger.info(
f" ✓ LIKELY safe to delete (audio verified on {len(verified_albums)} album(s): {', '.join(verified_names)})"
)
logger.info(f" Max confidence: {max_confidence}/100")
perfect_matches = [
a for a in verified_albums if a.get("confidence", 0) >= 95
]
for album_track in perfect_matches:
migration_msg = album_track.get("migration_message", "")
if migration_msg:
logger.info(f" Metadata: {migration_msg}")
logger.info(
" ⚠ CAUTION: Always check for different versions (remaster, radio edit, live, etc)"
)
logger.info("")
if __name__ == "__main__":
main()