From 026c7fe0d86d78731e04efa06f925f24f8a725b5 Mon Sep 17 00:00:00 2001 From: Danilo Reyes Date: Thu, 13 Nov 2025 21:46:03 -0600 Subject: [PATCH] Add lidarr-cleanup-singles package and integration - Introduced a new package `lidarr-cleanup-singles` to identify and manage duplicate single tracks in Lidarr. - Updated `flake.nix` to include the new package in outputs and modified app definitions to support it. - Created a new script in `src-cleanup` for the main functionality, including audio fingerprint verification. - Added necessary dependencies and configuration in `pyproject.toml` for the new package. - Removed unused `flake-utils` and `systems` entries from `flake.lock` to streamline the configuration. --- flake.lock | 34 --- flake.nix | 11 +- nix/package-cleanup.nix | 23 ++ src-cleanup/__init__.py | 2 + src-cleanup/main.py | 505 +++++++++++++++++++++++++++++++++++++ src-cleanup/pyproject.toml | 19 ++ 6 files changed, 559 insertions(+), 35 deletions(-) create mode 100644 nix/package-cleanup.nix create mode 100644 src-cleanup/__init__.py create mode 100644 src-cleanup/main.py create mode 100644 src-cleanup/pyproject.toml diff --git a/flake.lock b/flake.lock index 07d5f28..a090b59 100644 --- a/flake.lock +++ b/flake.lock @@ -1,23 +1,5 @@ { "nodes": { - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1731533236, - "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, "nixpkgs": { "locked": { "lastModified": 1762596750, @@ -36,24 +18,8 @@ }, "root": { "inputs": { - "flake-utils": "flake-utils", "nixpkgs": "nixpkgs" } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } } }, "root": "root", diff --git a/flake.nix b/flake.nix index abb6a81..d0448b2 100644 --- a/flake.nix +++ b/flake.nix @@ -13,13 +13,17 @@ inherit pkgs; src = lib.cleanSource ./src; }; + lidarr-cleanup-singles = import ./nix/package-cleanup.nix { + inherit pkgs; + src = lib.cleanSource ./src-cleanup; + }; in { nixosModules.lidarr-mb-gap = import ./nixos/lidarr-mb-gap.nix; packages.${system} = { default = lidarr-mb-gap; - inherit lidarr-mb-gap; + inherit lidarr-mb-gap lidarr-cleanup-singles; }; apps.${system} = { @@ -31,6 +35,10 @@ type = "app"; program = "${lidarr-mb-gap}/bin/lidarr-mb-gap"; }; + lidarr-cleanup-singles = { + type = "app"; + program = "${lidarr-cleanup-singles}/bin/lidarr-cleanup-singles"; + }; }; devShells.${system} = { @@ -43,6 +51,7 @@ ] )) pkgs.black + pkgs.chromaprint ]; shellHook = '' echo "Python environment ready!" diff --git a/nix/package-cleanup.nix b/nix/package-cleanup.nix new file mode 100644 index 0000000..2f12281 --- /dev/null +++ b/nix/package-cleanup.nix @@ -0,0 +1,23 @@ +{ pkgs, src }: + +pkgs.python3Packages.buildPythonApplication { + pname = "lidarr-cleanup-singles"; + version = "1.0.0"; + inherit src; + format = "pyproject"; + nativeBuildInputs = with pkgs.python3Packages; [ + setuptools + ]; + propagatedBuildInputs = with pkgs.python3Packages; [ + requests + python-dotenv + ]; + buildInputs = [ + pkgs.chromaprint + ]; + meta = { + mainProgram = "lidarr-cleanup-singles"; + description = "Identify duplicate single tracks in Lidarr"; + }; +} + diff --git a/src-cleanup/__init__.py b/src-cleanup/__init__.py new file mode 100644 index 0000000..2897813 --- /dev/null +++ b/src-cleanup/__init__.py @@ -0,0 +1,2 @@ +"""Lidarr Cleanup Singles - Remove duplicate single tracks""" + diff --git a/src-cleanup/main.py b/src-cleanup/main.py new file mode 100644 index 0000000..e7aba9e --- /dev/null +++ b/src-cleanup/main.py @@ -0,0 +1,505 @@ +#!/usr/bin/env python3 +""" +lidarr_cleanup_singles + +Identifies single-track releases that can safely be removed because +the same track already exists on a full album in Lidarr. +""" + +import argparse +import logging +import os +import subprocess +import sys +from collections import defaultdict +from typing import Dict, List, Optional, Tuple + +import requests +from dotenv import load_dotenv + +load_dotenv() + +logger = logging.getLogger(__name__) + + +def normalize_title(title: str) -> str: + """Normalize a track title for comparison""" + return " ".join(title.lower().split()) + + +def get_json( + url: str, headers: Dict[str, str], params: Optional[Dict[str, object]] = None +) -> List[Dict]: + """Wrapper around requests.get with basic error handling""" + try: + resp = requests.get(url, headers=headers, params=params, timeout=60) + resp.raise_for_status() + return resp.json() + except requests.exceptions.RequestException as e: + logger.error(f"Error fetching {url}: {e}") + raise + + +def get_trackfile_info( + base_url: str, track_file_id: int, headers: Dict[str, str] +) -> Optional[Dict]: + """Get trackfile information including file path""" + try: + resp = requests.get( + f"{base_url.rstrip('/')}/api/v1/trackfile/{track_file_id}", + headers=headers, + timeout=30, + ) + resp.raise_for_status() + return resp.json() + except requests.exceptions.RequestException as e: + logger.warning(f"Could not fetch trackfile {track_file_id}: {e}") + return None + + +def map_docker_path(file_path: str, docker_mount: Optional[str] = None) -> str: + """Map Docker container path to host path""" + if not docker_mount: + return file_path + + container_path, host_path = docker_mount.split(":", 1) + if file_path.startswith(container_path): + mapped_path = file_path.replace(container_path, host_path, 1) + return mapped_path + return file_path + + +def get_audio_fingerprint( + file_path: str, docker_mount: Optional[str] = None +) -> Optional[str]: + """Generate audio fingerprint using fpcalc (chromaprint)""" + mapped_path = map_docker_path(file_path, docker_mount) + logger.info(f"Generating fingerprint for: {mapped_path} (original: {file_path})") + + if not os.path.exists(mapped_path): + logger.warning(f"File not found: {mapped_path} (original: {file_path})") + return None + + try: + logger.debug(f"Running fpcalc on: {mapped_path}") + result = subprocess.run( + ["fpcalc", "-raw", mapped_path], + capture_output=True, + text=True, + timeout=60, + check=False, + ) + if result.returncode != 0: + logger.warning(f"fpcalc failed for {mapped_path}: {result.stderr}") + return None + + for line in result.stdout.split("\n"): + if line.startswith("FINGERPRINT="): + fingerprint = line.split("=", 1)[1] + logger.info( + f"Successfully generated fingerprint for {mapped_path} (length: {len(fingerprint)})" + ) + return fingerprint + logger.warning(f"fpcalc output did not contain FINGERPRINT= for {mapped_path}") + return None + except subprocess.TimeoutExpired: + logger.warning(f"fpcalc timeout for {mapped_path}") + return None + except FileNotFoundError: + logger.warning( + "fpcalc not found. Install chromaprint to enable audio verification." + ) + return None + except Exception as e: + logger.warning(f"Error generating fingerprint for {mapped_path}: {e}") + return None + + +def compare_fingerprints( + fp1: Optional[str], fp2: Optional[str], log_context: Optional[str] = None +) -> bool: + """Compare two audio fingerprints for similarity""" + if not fp1 or not fp2: + context_msg = f" ({log_context})" if log_context else "" + logger.debug(f"Fingerprint comparison failed: missing fingerprint{context_msg}") + return False + + if fp1 == fp2: + context_msg = f" ({log_context})" if log_context else "" + logger.info(f"Fingerprint comparison: exact match{context_msg}") + return True + + def decode_fingerprint(fp: str) -> List[int]: + return [int(x) for x in fp.split(",") if x.strip()] + + try: + f1 = decode_fingerprint(fp1) + f2 = decode_fingerprint(fp2) + + if not f1 or not f2: + context_msg = f" ({log_context})" if log_context else "" + logger.debug( + f"Fingerprint comparison failed: empty decoded fingerprint{context_msg}" + ) + return False + + min_len = min(len(f1), len(f2)) + if min_len == 0: + context_msg = f" ({log_context})" if log_context else "" + logger.debug(f"Fingerprint comparison failed: zero length{context_msg}") + return False + + matches = sum(1 for i in range(min_len) if f1[i] == f2[i]) + similarity = matches / min_len + match = similarity >= 0.95 + + context_msg = f" ({log_context})" if log_context else "" + logger.info( + f"Fingerprint comparison: similarity={similarity:.3f}, match={match}{context_msg}" + ) + return match + except (ValueError, ZeroDivisionError) as e: + context_msg = f" ({log_context})" if log_context else "" + logger.debug( + f"Fingerprint comparison failed: exception {type(e).__name__}{context_msg}" + ) + return False + + +# DELETE FUNCTIONALITY COMMENTED OUT FOR SAFETY +# def delete_track_file(base_url: str, track_file_id: int, headers: Dict[str, str]) -> None: +# """Delete a track file by ID""" +# delete_url = f"{base_url.rstrip('/')}/api/v1/trackfile/{track_file_id}" +# resp = requests.delete(delete_url, headers=headers, timeout=60) +# resp.raise_for_status() + + +def build_album_track_map( + base_url: str, headers: Dict[str, str], albums: List[Dict] +) -> Dict[Tuple[int, str], List[Dict]]: + """Create a mapping of tracks present on full albums""" + album_track_map: Dict[Tuple[int, str], List[Dict]] = defaultdict(list) + + album_albums = list( + filter(lambda album: album.get("albumType", "").lower() == "album", albums) + ) + + for album in album_albums: + album_id = album.get("id") + artist_id = album.get("artistId") + album_title = album.get("title", "Unknown") + if not album_id or not artist_id: + continue + + tracks = get_json( + f"{base_url.rstrip('/')}/api/v1/track", + headers, + params={"albumId": album_id}, + ) + + tracks_with_files = filter(lambda track: track.get("hasFile"), tracks) + + for track in tracks_with_files: + title = track.get("title") + track_file_id = track.get("trackFileId") + if not title or not track_file_id: + continue + key = (artist_id, normalize_title(title)) + album_track_map[key].append( + { + "album_id": album_id, + "album_title": album_title, + "track_file_id": track_file_id, + } + ) + + return album_track_map + + +def verify_audio_match( + base_url: str, + headers: Dict[str, str], + single_track_file_id: int, + album_track_file_id: int, + docker_mount: Optional[str] = None, +) -> bool: + """Verify that two track files contain the same audio""" + logger.info( + f"Verifying audio match: single trackFileId {single_track_file_id} vs album trackFileId {album_track_file_id}" + ) + single_file_info = get_trackfile_info(base_url, single_track_file_id, headers) + album_file_info = get_trackfile_info(base_url, album_track_file_id, headers) + + if not single_file_info or not album_file_info: + logger.debug( + f"Could not fetch track file info: single={single_file_info is not None}, album={album_file_info is not None}" + ) + return False + + single_path = single_file_info.get("path") + album_path = album_file_info.get("path") + + if not single_path or not album_path: + logger.debug( + f"Missing file paths: single_path={single_path is not None}, album_path={album_path is not None}" + ) + return False + + logger.info(f"Fetching fingerprints: single={single_path}, album={album_path}") + single_fp = get_audio_fingerprint(single_path, docker_mount) + album_fp = get_audio_fingerprint(album_path, docker_mount) + + log_context = f"single trackFileId {single_track_file_id} vs album trackFileId {album_track_file_id}" + return compare_fingerprints(single_fp, album_fp, log_context) + + +def find_duplicate_singles( + base_url: str, + headers: Dict[str, str], + albums: List[Dict], + album_track_map: Dict[Tuple[int, str], List[Dict]], + verify_audio: bool = True, + docker_mount: Optional[str] = None, +) -> List[Dict]: + """Identify single tracks that duplicate album tracks""" + duplicates: List[Dict] = [] + + single_albums = list( + filter(lambda album: album.get("albumType", "").lower() == "single", albums) + ) + + for album in single_albums: + album_id = album.get("id") + artist_id = album.get("artistId") + album_title = album.get("title", "") + if not album_id or not artist_id: + continue + + tracks = get_json( + f"{base_url.rstrip('/')}/api/v1/track", + headers, + params={"albumId": album_id}, + ) + + tracks_with_files = filter(lambda track: track.get("hasFile"), tracks) + + for track in tracks_with_files: + title = track.get("title") + track_file_id = track.get("trackFileId") + if not title or not track_file_id: + continue + + key = (artist_id, normalize_title(title)) + if key not in album_track_map: + continue + + duplicate_albums = album_track_map[key] + verified_albums = [] + + if verify_audio: + logger.info( + f"Verifying audio for '{title}' from single '{album_title}' against {len(duplicate_albums)} album track(s)..." + ) + for album_track in duplicate_albums: + album_track_file_id = album_track["track_file_id"] + if verify_audio_match( + base_url, + headers, + track_file_id, + album_track_file_id, + docker_mount, + ): + verified_albums.append(album_track) + logger.debug( + f"Audio match confirmed: single trackFileId {track_file_id} matches album '{album_track['album_title']}' trackFileId {album_track_file_id}" + ) + else: + logger.debug( + f"Audio mismatch: single trackFileId {track_file_id} does not match album '{album_track['album_title']}' trackFileId {album_track_file_id}" + ) + else: + verified_albums = duplicate_albums + + if verified_albums: + duplicates.append( + { + "artist_id": artist_id, + "single_album_id": album_id, + "single_album_title": album_title, + "track_title": title, + "single_track_file_id": track_file_id, + "duplicate_albums": verified_albums, + } + ) + + return duplicates + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Identify single tracks that are duplicates of album tracks in Lidarr." + ) + parser.add_argument( + "--base-url", + default=os.getenv("LIDARR_URL"), + help="Base URL of the Lidarr instance (e.g. https://music.example.org). Can also be set via LIDARR_URL env var.", + ) + parser.add_argument( + "--api-key", + default=os.getenv("LIDARR_API_KEY"), + help="API key for Lidarr with sufficient permissions. Can also be set via LIDARR_API_KEY env var.", + ) + parser.add_argument( + "--no-audio-verify", + action="store_true", + help="Skip audio fingerprint verification (faster but less accurate)", + ) + parser.add_argument( + "--docker-mount", + default=os.getenv("DOCKER_MOUNT"), + help="Docker mount mapping in format 'container_path:host_path' (e.g. '/music:/srv/pool/multimedia/media/Music'). Can also be set via DOCKER_MOUNT env var.", + ) + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug logging", + ) + # DELETE FLAG COMMENTED OUT FOR SAFETY + # parser.add_argument( + # "--delete", + # action="store_true", + # help="If set, delete the duplicate single track files instead of just listing them", + # ) + # parser.add_argument( + # "--force", + # action="store_true", + # help="If set together with --delete, do not prompt for confirmation before deletion", + # ) + args = parser.parse_args() + + logging.basicConfig( + level=logging.DEBUG if args.debug else logging.INFO, + format="[%(levelname)s] %(message)s", + handlers=[logging.StreamHandler(sys.stdout)], + ) + + if not args.base_url: + logger.error( + "LIDARR_URL not set. Provide --base-url or set LIDARR_URL environment variable." + ) + sys.exit(1) + + if not args.api_key: + logger.error( + "LIDARR_API_KEY not set. Provide --api-key or set LIDARR_API_KEY environment variable." + ) + sys.exit(1) + + base_url = args.base_url.rstrip("/") + headers = {"X-Api-Key": args.api_key} + + logger.info("Fetching artists...") + artists = get_json(f"{base_url}/api/v1/artist", headers) + if not artists: + logger.warning("No artists found. Exiting.") + return + + artist_map = { + artist.get("id"): artist.get("artistName", "Unknown") + for artist in artists + if artist.get("id") + } + + logger.info("Fetching albums for each artist...") + albums: List[Dict] = [] + for artist in artists: + artist_id = artist.get("id") + if not artist_id: + continue + artist_albums = get_json( + f"{base_url}/api/v1/album", + headers, + params={"artistId": artist_id}, + ) + albums.extend(artist_albums) + + if not albums: + logger.warning("No albums found in the library.") + return + + logger.info("Building album track map...") + album_track_map = build_album_track_map(base_url, headers, albums) + + verify_audio = not args.no_audio_verify + docker_mount = args.docker_mount if args.docker_mount else None + + if verify_audio: + if docker_mount: + logger.info( + f"Scanning for duplicate singles with audio verification (Docker mount: {docker_mount})..." + ) + else: + logger.info("Scanning for duplicate singles with audio verification...") + logger.info( + "NOTE: Audio verification requires 'fpcalc' (chromaprint) to be installed" + ) + else: + logger.info( + "Scanning for duplicate singles (audio verification disabled - using title matching only)..." + ) + + duplicates = find_duplicate_singles( + base_url, + headers, + albums, + album_track_map, + verify_audio=verify_audio, + docker_mount=docker_mount, + ) + + if not duplicates: + logger.info("No duplicate singles found. The library appears clean.") + return + + logger.info( + f"Found {len(duplicates)} single track(s) that are duplicates of album tracks:" + ) + for dup in duplicates: + artist_id = dup["artist_id"] + artist_name = artist_map.get(artist_id, f"Unknown (ID: {artist_id})") + duplicate_albums = dup["duplicate_albums"] + album_names = [album["album_title"] for album in duplicate_albums] + + logger.info(f"Artist: {artist_name}") + logger.info(f" Single: '{dup['single_album_title']}'") + logger.info( + f" Track: '{dup['track_title']}' (trackFileId: {dup['single_track_file_id']})" + ) + logger.info( + f" Already present on {len(duplicate_albums)} album(s): {', '.join(album_names)}" + ) + logger.info("") + + # DELETE FUNCTIONALITY COMMENTED OUT FOR SAFETY + # if args.delete: + # if not args.force: + # confirm = input( + # f"\nAre you sure you want to delete these {len(duplicates)} single track file(s)? [y/N]: " + # ).strip().lower() + # if confirm not in ("y", "yes"): + # logger.info("Aborting deletion.") + # return + # logger.info("Deleting duplicate single track files...") + # for dup in duplicates: + # track_file_id = dup["single_track_file_id"] + # try: + # delete_track_file(base_url, track_file_id, headers) + # logger.info( + # f"Deleted trackFileId {track_file_id} (track '{dup['track_title']}' from single '{dup['single_album_title']}')." + # ) + # except Exception as exc: + # logger.error(f"Failed to delete trackFileId {track_file_id}: {exc}") + + +if __name__ == "__main__": + main() diff --git a/src-cleanup/pyproject.toml b/src-cleanup/pyproject.toml new file mode 100644 index 0000000..883b03e --- /dev/null +++ b/src-cleanup/pyproject.toml @@ -0,0 +1,19 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[tool.setuptools] +py-modules = ["main"] + +[project] +name = "lidarr-cleanup-singles" +version = "1.0.0" +description = "Identify and optionally delete duplicate single tracks in Lidarr" +requires-python = ">=3.8" +dependencies = [ + "requests", +] + +[project.scripts] +lidarr-cleanup-singles = "main:main" +