Add audio verification and duplicate tracking features
- Integrated `plexapi` and `python-dotenv` as dependencies in `flake.nix` and `pyproject.toml` for enhanced functionality. - Implemented new modules for audio verification and duplicate tracking, including `audio_verification.py`, `duplicate_finder.py`, and `track_verification.py`. - Updated `main.py` to utilize the new modules for identifying and managing duplicate single tracks in Lidarr, with detailed logging and confidence scoring. - Enhanced the `find_duplicate_singles` function to support audio verification results and metadata migration to Plex. - Refactored existing code for improved structure and maintainability, ensuring better integration of new features.
This commit is contained in:
267
src-cleanup/duplicate_finder.py
Normal file
267
src-cleanup/duplicate_finder.py
Normal file
@@ -0,0 +1,267 @@
|
||||
"""Functions to find duplicate singles in Lidarr"""
|
||||
|
||||
import logging
|
||||
from collections import defaultdict
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from lidarr_client import fetch_tracks_for_album, get_trackfile_info
|
||||
from track_verification import verify_audio_match
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def normalize_title(title: str) -> str:
|
||||
"""Normalize a track title for comparison"""
|
||||
return " ".join(title.lower().split())
|
||||
|
||||
|
||||
def build_album_track_map(
|
||||
base_url: str, headers: Dict[str, str], albums: List[Dict]
|
||||
) -> Dict[Tuple[int, str], List[Dict]]:
|
||||
"""Create a mapping of tracks present on full albums"""
|
||||
album_track_map: Dict[Tuple[int, str], List[Dict]] = defaultdict(list)
|
||||
|
||||
def process_album_for_map(album):
|
||||
"""Process single album and add tracks to map"""
|
||||
album_id = album.get("id")
|
||||
artist_id = album.get("artistId")
|
||||
album_title = album.get("title", "Unknown")
|
||||
|
||||
if not (album_id and artist_id):
|
||||
return
|
||||
|
||||
tracks = fetch_tracks_for_album(base_url, headers, album_id)
|
||||
if not tracks:
|
||||
logger.debug(
|
||||
f"Skipping album '{album_title}' (albumId: {album_id}) - could not fetch tracks"
|
||||
)
|
||||
return
|
||||
|
||||
def add_track_to_map(track):
|
||||
"""Add track to album_track_map"""
|
||||
title = track.get("title")
|
||||
track_id = track.get("id")
|
||||
track_file_id = track.get("trackFileId")
|
||||
|
||||
if not (title and track_file_id and track_id):
|
||||
return
|
||||
|
||||
key = (artist_id, normalize_title(title))
|
||||
album_track_map[key].append(
|
||||
{
|
||||
"album_id": album_id,
|
||||
"album_title": album_title,
|
||||
"track_id": track_id,
|
||||
"track_file_id": track_file_id,
|
||||
}
|
||||
)
|
||||
|
||||
tracks_with_files = filter(lambda track: track.get("hasFile"), tracks)
|
||||
list(map(add_track_to_map, tracks_with_files))
|
||||
|
||||
album_albums = filter(
|
||||
lambda album: album.get("albumType", "").lower() == "album", albums
|
||||
)
|
||||
list(map(process_album_for_map, album_albums))
|
||||
|
||||
return album_track_map
|
||||
|
||||
|
||||
def create_unverified_duplicate(
|
||||
artist_id, album_id, album_title, title, track_file_id, duplicate_albums
|
||||
) -> Dict:
|
||||
"""Create duplicate entry for unverified tracks"""
|
||||
return {
|
||||
"artist_id": artist_id,
|
||||
"single_album_id": album_id,
|
||||
"single_album_title": album_title,
|
||||
"track_title": title,
|
||||
"single_track_file_id": track_file_id,
|
||||
"duplicate_albums": duplicate_albums,
|
||||
"verified_albums": duplicate_albums,
|
||||
"verification_results": ["Audio verification disabled"],
|
||||
"confidence_scores": [0],
|
||||
}
|
||||
|
||||
|
||||
def verify_and_mark_album_track(
|
||||
base_url,
|
||||
headers,
|
||||
track_id,
|
||||
track_file_id,
|
||||
album_track,
|
||||
docker_mount,
|
||||
single_file_path,
|
||||
) -> Tuple[bool, Optional[Dict], str, int]:
|
||||
"""Verify album track and mark for migration if perfect match"""
|
||||
album_track_id = album_track["track_id"]
|
||||
album_track_file_id = album_track["track_file_id"]
|
||||
|
||||
album_track_file_info = get_trackfile_info(base_url, album_track_file_id, headers)
|
||||
album_file_path = (
|
||||
album_track_file_info.get("path") if album_track_file_info else None
|
||||
)
|
||||
|
||||
match, result_message, confidence = verify_audio_match(
|
||||
base_url,
|
||||
headers,
|
||||
track_id,
|
||||
track_file_id,
|
||||
album_track_id,
|
||||
album_track_file_id,
|
||||
docker_mount,
|
||||
)
|
||||
|
||||
if not match:
|
||||
logger.debug(
|
||||
f"Audio mismatch: single trackFileId {track_file_id} does not match album '{album_track['album_title']}' trackFileId {album_track_file_id} (confidence: {confidence}/100)"
|
||||
)
|
||||
return False, None, result_message, confidence
|
||||
|
||||
album_track["confidence"] = confidence
|
||||
album_track["migration_status"] = (
|
||||
"eligible"
|
||||
if confidence >= 95 and single_file_path and album_file_path
|
||||
else "not_eligible"
|
||||
)
|
||||
if album_track["migration_status"] == "eligible":
|
||||
album_track["single_file_path"] = single_file_path
|
||||
album_track["album_file_path"] = album_file_path
|
||||
|
||||
logger.debug(
|
||||
f"Audio match confirmed: single trackFileId {track_file_id} matches album '{album_track['album_title']}' trackFileId {album_track_file_id} (confidence: {confidence}/100)"
|
||||
)
|
||||
return True, album_track, result_message, confidence
|
||||
|
||||
|
||||
def process_single_track(
|
||||
base_url,
|
||||
headers,
|
||||
album_id,
|
||||
artist_id,
|
||||
album_title,
|
||||
track,
|
||||
album_track_map,
|
||||
verify_audio,
|
||||
docker_mount,
|
||||
) -> Optional[Dict]:
|
||||
"""Process a single track and return duplicate info or None"""
|
||||
title = track.get("title")
|
||||
track_id = track.get("id")
|
||||
track_file_id = track.get("trackFileId")
|
||||
|
||||
if not (title and track_file_id and track_id):
|
||||
return None
|
||||
|
||||
key = (artist_id, normalize_title(title))
|
||||
if key not in album_track_map:
|
||||
return None
|
||||
|
||||
duplicate_albums = album_track_map[key]
|
||||
if not duplicate_albums:
|
||||
return None
|
||||
|
||||
if not verify_audio:
|
||||
return create_unverified_duplicate(
|
||||
artist_id, album_id, album_title, title, track_file_id, duplicate_albums
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Verifying audio for '{title}' from single '{album_title}' against {len(duplicate_albums)} album track(s)..."
|
||||
)
|
||||
|
||||
single_track_file_info = get_trackfile_info(base_url, track_file_id, headers)
|
||||
single_file_path = (
|
||||
single_track_file_info.get("path") if single_track_file_info else None
|
||||
)
|
||||
|
||||
verification_data = list(
|
||||
map(
|
||||
lambda album_track: verify_and_mark_album_track(
|
||||
base_url,
|
||||
headers,
|
||||
track_id,
|
||||
track_file_id,
|
||||
album_track,
|
||||
docker_mount,
|
||||
single_file_path,
|
||||
),
|
||||
duplicate_albums,
|
||||
)
|
||||
)
|
||||
|
||||
verified_albums = [
|
||||
track for match, track, _, _ in verification_data if match and track
|
||||
]
|
||||
verification_results = [result for _, _, result, _ in verification_data]
|
||||
confidence_scores = [conf for _, _, _, conf in verification_data]
|
||||
|
||||
return {
|
||||
"artist_id": artist_id,
|
||||
"single_album_id": album_id,
|
||||
"single_album_title": album_title,
|
||||
"track_title": title,
|
||||
"single_track_file_id": track_file_id,
|
||||
"duplicate_albums": duplicate_albums,
|
||||
"verified_albums": verified_albums,
|
||||
"verification_results": verification_results,
|
||||
"confidence_scores": confidence_scores,
|
||||
}
|
||||
|
||||
|
||||
def process_single_album(
|
||||
base_url, headers, album, album_track_map, verify_audio, docker_mount
|
||||
) -> List[Dict]:
|
||||
"""Process a single album and return list of duplicates found"""
|
||||
album_id = album.get("id")
|
||||
artist_id = album.get("artistId")
|
||||
album_title = album.get("title", "")
|
||||
|
||||
if not (album_id and artist_id):
|
||||
return []
|
||||
|
||||
tracks = fetch_tracks_for_album(base_url, headers, album_id)
|
||||
if not tracks:
|
||||
logger.debug(
|
||||
f"Skipping single album '{album_title}' (albumId: {album_id}) - could not fetch tracks"
|
||||
)
|
||||
return []
|
||||
|
||||
tracks_with_files = filter(lambda track: track.get("hasFile"), tracks)
|
||||
process_track = lambda track: process_single_track(
|
||||
base_url,
|
||||
headers,
|
||||
album_id,
|
||||
artist_id,
|
||||
album_title,
|
||||
track,
|
||||
album_track_map,
|
||||
verify_audio,
|
||||
docker_mount,
|
||||
)
|
||||
duplicate_infos = map(process_track, tracks_with_files)
|
||||
|
||||
return list(filter(lambda x: x is not None, duplicate_infos))
|
||||
|
||||
|
||||
def find_duplicate_singles(
|
||||
base_url: str,
|
||||
headers: Dict[str, str],
|
||||
albums: List[Dict],
|
||||
album_track_map: Dict[Tuple[int, str], List[Dict]],
|
||||
verify_audio: bool = True,
|
||||
docker_mount: Optional[str] = None,
|
||||
) -> List[Dict]:
|
||||
"""Identify single tracks that duplicate album tracks"""
|
||||
single_albums = filter(
|
||||
lambda album: album.get("albumType", "").lower() == "single", albums
|
||||
)
|
||||
|
||||
album_duplicates = map(
|
||||
lambda album: process_single_album(
|
||||
base_url, headers, album, album_track_map, verify_audio, docker_mount
|
||||
),
|
||||
single_albums,
|
||||
)
|
||||
|
||||
return [dup for album_dups in album_duplicates for dup in album_dups]
|
||||
Reference in New Issue
Block a user