Add audio verification and duplicate tracking features

- Integrated `plexapi` and `python-dotenv` as dependencies in `flake.nix` and `pyproject.toml` for enhanced functionality. - Implemented new modules for audio verification and duplicate tracking, including `audio_verification.py`, `duplicate_finder.py`, and `track_verification.py`. - Updated `main.py` to utilize the new modules for identifying and managing duplicate single tracks in Lidarr, with detailed logging and confidence scoring. - Enhanced the `find_duplicate_singles` function to support audio verification results and metadata migration to Plex. - Refactored existing code for improved structure and maintainability, ensuring better integration of new features.
2025-11-14 01:32:41 -06:00
parent 03e8eb6f4e
commit af5a2bf825
10 changed files with 1090 additions and 680 deletions
--- a/src-cleanup/duplicate_finder.py
+++ b/src-cleanup/duplicate_finder.py
@@ -0,0 +1,267 @@
+"""Functions to find duplicate singles in Lidarr"""
+
+import logging
+from collections import defaultdict
+from typing import Dict, List, Optional, Tuple
+
+from lidarr_client import fetch_tracks_for_album, get_trackfile_info
+from track_verification import verify_audio_match
+
+logger = logging.getLogger(__name__)
+
+
+def normalize_title(title: str) -> str:
+    """Normalize a track title for comparison"""
+    return " ".join(title.lower().split())
+
+
+def build_album_track_map(
+    base_url: str, headers: Dict[str, str], albums: List[Dict]
+) -> Dict[Tuple[int, str], List[Dict]]:
+    """Create a mapping of tracks present on full albums"""
+    album_track_map: Dict[Tuple[int, str], List[Dict]] = defaultdict(list)
+
+    def process_album_for_map(album):
+        """Process single album and add tracks to map"""
+        album_id = album.get("id")
+        artist_id = album.get("artistId")
+        album_title = album.get("title", "Unknown")
+
+        if not (album_id and artist_id):
+            return
+
+        tracks = fetch_tracks_for_album(base_url, headers, album_id)
+        if not tracks:
+            logger.debug(
+                f"Skipping album '{album_title}' (albumId: {album_id}) - could not fetch tracks"
+            )
+            return
+
+        def add_track_to_map(track):
+            """Add track to album_track_map"""
+            title = track.get("title")
+            track_id = track.get("id")
+            track_file_id = track.get("trackFileId")
+
+            if not (title and track_file_id and track_id):
+                return
+
+            key = (artist_id, normalize_title(title))
+            album_track_map[key].append(
+                {
+                    "album_id": album_id,
+                    "album_title": album_title,
+                    "track_id": track_id,
+                    "track_file_id": track_file_id,
+                }
+            )
+
+        tracks_with_files = filter(lambda track: track.get("hasFile"), tracks)
+        list(map(add_track_to_map, tracks_with_files))
+
+    album_albums = filter(
+        lambda album: album.get("albumType", "").lower() == "album", albums
+    )
+    list(map(process_album_for_map, album_albums))
+
+    return album_track_map
+
+
+def create_unverified_duplicate(
+    artist_id, album_id, album_title, title, track_file_id, duplicate_albums
+) -> Dict:
+    """Create duplicate entry for unverified tracks"""
+    return {
+        "artist_id": artist_id,
+        "single_album_id": album_id,
+        "single_album_title": album_title,
+        "track_title": title,
+        "single_track_file_id": track_file_id,
+        "duplicate_albums": duplicate_albums,
+        "verified_albums": duplicate_albums,
+        "verification_results": ["Audio verification disabled"],
+        "confidence_scores": [0],
+    }
+
+
+def verify_and_mark_album_track(
+    base_url,
+    headers,
+    track_id,
+    track_file_id,
+    album_track,
+    docker_mount,
+    single_file_path,
+) -> Tuple[bool, Optional[Dict], str, int]:
+    """Verify album track and mark for migration if perfect match"""
+    album_track_id = album_track["track_id"]
+    album_track_file_id = album_track["track_file_id"]
+
+    album_track_file_info = get_trackfile_info(base_url, album_track_file_id, headers)
+    album_file_path = (
+        album_track_file_info.get("path") if album_track_file_info else None
+    )
+
+    match, result_message, confidence = verify_audio_match(
+        base_url,
+        headers,
+        track_id,
+        track_file_id,
+        album_track_id,
+        album_track_file_id,
+        docker_mount,
+    )
+
+    if not match:
+        logger.debug(
+            f"Audio mismatch: single trackFileId {track_file_id} does not match album '{album_track['album_title']}' trackFileId {album_track_file_id} (confidence: {confidence}/100)"
+        )
+        return False, None, result_message, confidence
+
+    album_track["confidence"] = confidence
+    album_track["migration_status"] = (
+        "eligible"
+        if confidence >= 95 and single_file_path and album_file_path
+        else "not_eligible"
+    )
+    if album_track["migration_status"] == "eligible":
+        album_track["single_file_path"] = single_file_path
+        album_track["album_file_path"] = album_file_path
+
+    logger.debug(
+        f"Audio match confirmed: single trackFileId {track_file_id} matches album '{album_track['album_title']}' trackFileId {album_track_file_id} (confidence: {confidence}/100)"
+    )
+    return True, album_track, result_message, confidence
+
+
+def process_single_track(
+    base_url,
+    headers,
+    album_id,
+    artist_id,
+    album_title,
+    track,
+    album_track_map,
+    verify_audio,
+    docker_mount,
+) -> Optional[Dict]:
+    """Process a single track and return duplicate info or None"""
+    title = track.get("title")
+    track_id = track.get("id")
+    track_file_id = track.get("trackFileId")
+
+    if not (title and track_file_id and track_id):
+        return None
+
+    key = (artist_id, normalize_title(title))
+    if key not in album_track_map:
+        return None
+
+    duplicate_albums = album_track_map[key]
+    if not duplicate_albums:
+        return None
+
+    if not verify_audio:
+        return create_unverified_duplicate(
+            artist_id, album_id, album_title, title, track_file_id, duplicate_albums
+        )
+
+    logger.debug(
+        f"Verifying audio for '{title}' from single '{album_title}' against {len(duplicate_albums)} album track(s)..."
+    )
+
+    single_track_file_info = get_trackfile_info(base_url, track_file_id, headers)
+    single_file_path = (
+        single_track_file_info.get("path") if single_track_file_info else None
+    )
+
+    verification_data = list(
+        map(
+            lambda album_track: verify_and_mark_album_track(
+                base_url,
+                headers,
+                track_id,
+                track_file_id,
+                album_track,
+                docker_mount,
+                single_file_path,
+            ),
+            duplicate_albums,
+        )
+    )
+
+    verified_albums = [
+        track for match, track, _, _ in verification_data if match and track
+    ]
+    verification_results = [result for _, _, result, _ in verification_data]
+    confidence_scores = [conf for _, _, _, conf in verification_data]
+
+    return {
+        "artist_id": artist_id,
+        "single_album_id": album_id,
+        "single_album_title": album_title,
+        "track_title": title,
+        "single_track_file_id": track_file_id,
+        "duplicate_albums": duplicate_albums,
+        "verified_albums": verified_albums,
+        "verification_results": verification_results,
+        "confidence_scores": confidence_scores,
+    }
+
+
+def process_single_album(
+    base_url, headers, album, album_track_map, verify_audio, docker_mount
+) -> List[Dict]:
+    """Process a single album and return list of duplicates found"""
+    album_id = album.get("id")
+    artist_id = album.get("artistId")
+    album_title = album.get("title", "")
+
+    if not (album_id and artist_id):
+        return []
+
+    tracks = fetch_tracks_for_album(base_url, headers, album_id)
+    if not tracks:
+        logger.debug(
+            f"Skipping single album '{album_title}' (albumId: {album_id}) - could not fetch tracks"
+        )
+        return []
+
+    tracks_with_files = filter(lambda track: track.get("hasFile"), tracks)
+    process_track = lambda track: process_single_track(
+        base_url,
+        headers,
+        album_id,
+        artist_id,
+        album_title,
+        track,
+        album_track_map,
+        verify_audio,
+        docker_mount,
+    )
+    duplicate_infos = map(process_track, tracks_with_files)
+
+    return list(filter(lambda x: x is not None, duplicate_infos))
+
+
+def find_duplicate_singles(
+    base_url: str,
+    headers: Dict[str, str],
+    albums: List[Dict],
+    album_track_map: Dict[Tuple[int, str], List[Dict]],
+    verify_audio: bool = True,
+    docker_mount: Optional[str] = None,
+) -> List[Dict]:
+    """Identify single tracks that duplicate album tracks"""
+    single_albums = filter(
+        lambda album: album.get("albumType", "").lower() == "single", albums
+    )
+
+    album_duplicates = map(
+        lambda album: process_single_album(
+            base_url, headers, album, album_track_map, verify_audio, docker_mount
+        ),
+        single_albums,
+    )
+
+    return [dup for album_dups in album_duplicates for dup in album_dups]