Files
lidarr-mb-gap/src-cleanup/duplicate_finder.py
Danilo Reyes af5a2bf825 Add audio verification and duplicate tracking features
- Integrated `plexapi` and `python-dotenv` as dependencies in `flake.nix` and `pyproject.toml` for enhanced functionality.
- Implemented new modules for audio verification and duplicate tracking, including `audio_verification.py`, `duplicate_finder.py`, and `track_verification.py`.
- Updated `main.py` to utilize the new modules for identifying and managing duplicate single tracks in Lidarr, with detailed logging and confidence scoring.
- Enhanced the `find_duplicate_singles` function to support audio verification results and metadata migration to Plex.
- Refactored existing code for improved structure and maintainability, ensuring better integration of new features.
2025-11-14 01:32:41 -06:00

268 lines
8.2 KiB
Python

"""Functions to find duplicate singles in Lidarr"""
import logging
from collections import defaultdict
from typing import Dict, List, Optional, Tuple
from lidarr_client import fetch_tracks_for_album, get_trackfile_info
from track_verification import verify_audio_match
logger = logging.getLogger(__name__)
def normalize_title(title: str) -> str:
"""Normalize a track title for comparison"""
return " ".join(title.lower().split())
def build_album_track_map(
base_url: str, headers: Dict[str, str], albums: List[Dict]
) -> Dict[Tuple[int, str], List[Dict]]:
"""Create a mapping of tracks present on full albums"""
album_track_map: Dict[Tuple[int, str], List[Dict]] = defaultdict(list)
def process_album_for_map(album):
"""Process single album and add tracks to map"""
album_id = album.get("id")
artist_id = album.get("artistId")
album_title = album.get("title", "Unknown")
if not (album_id and artist_id):
return
tracks = fetch_tracks_for_album(base_url, headers, album_id)
if not tracks:
logger.debug(
f"Skipping album '{album_title}' (albumId: {album_id}) - could not fetch tracks"
)
return
def add_track_to_map(track):
"""Add track to album_track_map"""
title = track.get("title")
track_id = track.get("id")
track_file_id = track.get("trackFileId")
if not (title and track_file_id and track_id):
return
key = (artist_id, normalize_title(title))
album_track_map[key].append(
{
"album_id": album_id,
"album_title": album_title,
"track_id": track_id,
"track_file_id": track_file_id,
}
)
tracks_with_files = filter(lambda track: track.get("hasFile"), tracks)
list(map(add_track_to_map, tracks_with_files))
album_albums = filter(
lambda album: album.get("albumType", "").lower() == "album", albums
)
list(map(process_album_for_map, album_albums))
return album_track_map
def create_unverified_duplicate(
artist_id, album_id, album_title, title, track_file_id, duplicate_albums
) -> Dict:
"""Create duplicate entry for unverified tracks"""
return {
"artist_id": artist_id,
"single_album_id": album_id,
"single_album_title": album_title,
"track_title": title,
"single_track_file_id": track_file_id,
"duplicate_albums": duplicate_albums,
"verified_albums": duplicate_albums,
"verification_results": ["Audio verification disabled"],
"confidence_scores": [0],
}
def verify_and_mark_album_track(
base_url,
headers,
track_id,
track_file_id,
album_track,
docker_mount,
single_file_path,
) -> Tuple[bool, Optional[Dict], str, int]:
"""Verify album track and mark for migration if perfect match"""
album_track_id = album_track["track_id"]
album_track_file_id = album_track["track_file_id"]
album_track_file_info = get_trackfile_info(base_url, album_track_file_id, headers)
album_file_path = (
album_track_file_info.get("path") if album_track_file_info else None
)
match, result_message, confidence = verify_audio_match(
base_url,
headers,
track_id,
track_file_id,
album_track_id,
album_track_file_id,
docker_mount,
)
if not match:
logger.debug(
f"Audio mismatch: single trackFileId {track_file_id} does not match album '{album_track['album_title']}' trackFileId {album_track_file_id} (confidence: {confidence}/100)"
)
return False, None, result_message, confidence
album_track["confidence"] = confidence
album_track["migration_status"] = (
"eligible"
if confidence >= 95 and single_file_path and album_file_path
else "not_eligible"
)
if album_track["migration_status"] == "eligible":
album_track["single_file_path"] = single_file_path
album_track["album_file_path"] = album_file_path
logger.debug(
f"Audio match confirmed: single trackFileId {track_file_id} matches album '{album_track['album_title']}' trackFileId {album_track_file_id} (confidence: {confidence}/100)"
)
return True, album_track, result_message, confidence
def process_single_track(
base_url,
headers,
album_id,
artist_id,
album_title,
track,
album_track_map,
verify_audio,
docker_mount,
) -> Optional[Dict]:
"""Process a single track and return duplicate info or None"""
title = track.get("title")
track_id = track.get("id")
track_file_id = track.get("trackFileId")
if not (title and track_file_id and track_id):
return None
key = (artist_id, normalize_title(title))
if key not in album_track_map:
return None
duplicate_albums = album_track_map[key]
if not duplicate_albums:
return None
if not verify_audio:
return create_unverified_duplicate(
artist_id, album_id, album_title, title, track_file_id, duplicate_albums
)
logger.debug(
f"Verifying audio for '{title}' from single '{album_title}' against {len(duplicate_albums)} album track(s)..."
)
single_track_file_info = get_trackfile_info(base_url, track_file_id, headers)
single_file_path = (
single_track_file_info.get("path") if single_track_file_info else None
)
verification_data = list(
map(
lambda album_track: verify_and_mark_album_track(
base_url,
headers,
track_id,
track_file_id,
album_track,
docker_mount,
single_file_path,
),
duplicate_albums,
)
)
verified_albums = [
track for match, track, _, _ in verification_data if match and track
]
verification_results = [result for _, _, result, _ in verification_data]
confidence_scores = [conf for _, _, _, conf in verification_data]
return {
"artist_id": artist_id,
"single_album_id": album_id,
"single_album_title": album_title,
"track_title": title,
"single_track_file_id": track_file_id,
"duplicate_albums": duplicate_albums,
"verified_albums": verified_albums,
"verification_results": verification_results,
"confidence_scores": confidence_scores,
}
def process_single_album(
base_url, headers, album, album_track_map, verify_audio, docker_mount
) -> List[Dict]:
"""Process a single album and return list of duplicates found"""
album_id = album.get("id")
artist_id = album.get("artistId")
album_title = album.get("title", "")
if not (album_id and artist_id):
return []
tracks = fetch_tracks_for_album(base_url, headers, album_id)
if not tracks:
logger.debug(
f"Skipping single album '{album_title}' (albumId: {album_id}) - could not fetch tracks"
)
return []
tracks_with_files = filter(lambda track: track.get("hasFile"), tracks)
process_track = lambda track: process_single_track(
base_url,
headers,
album_id,
artist_id,
album_title,
track,
album_track_map,
verify_audio,
docker_mount,
)
duplicate_infos = map(process_track, tracks_with_files)
return list(filter(lambda x: x is not None, duplicate_infos))
def find_duplicate_singles(
base_url: str,
headers: Dict[str, str],
albums: List[Dict],
album_track_map: Dict[Tuple[int, str], List[Dict]],
verify_audio: bool = True,
docker_mount: Optional[str] = None,
) -> List[Dict]:
"""Identify single tracks that duplicate album tracks"""
single_albums = filter(
lambda album: album.get("albumType", "").lower() == "single", albums
)
album_duplicates = map(
lambda album: process_single_album(
base_url, headers, album, album_track_map, verify_audio, docker_mount
),
single_albums,
)
return [dup for album_dups in album_duplicates for dup in album_dups]