lidarr-mb-gap/src-cleanup/duplicate_finder.py

"""Functions to find duplicate singles in Lidarr"""

import logging
from collections import defaultdict
from typing import Dict, List, Optional, Tuple

from lidarr_client import fetch_tracks_for_album, get_trackfile_info
from track_verification import verify_audio_match

logger = logging.getLogger(__name__)


def normalize_title(title: str) -> str:
    """Normalize a track title for comparison"""
    return " ".join(title.lower().split())


def build_album_track_map(
    base_url: str, headers: Dict[str, str], albums: List[Dict]
) -> Dict[Tuple[int, str], List[Dict]]:
    """Create a mapping of tracks present on full albums"""
    album_track_map: Dict[Tuple[int, str], List[Dict]] = defaultdict(list)

    def process_album_for_map(album):
        """Process single album and add tracks to map"""
        album_id = album.get("id")
        artist_id = album.get("artistId")
        album_title = album.get("title", "Unknown")

        if not (album_id and artist_id):
            return

        tracks = fetch_tracks_for_album(base_url, headers, album_id)
        if not tracks:
            logger.debug(
                f"Skipping album '{album_title}' (albumId: {album_id}) - could not fetch tracks"
            )
            return

        def add_track_to_map(track):
            """Add track to album_track_map"""
            title = track.get("title")
            track_id = track.get("id")
            track_file_id = track.get("trackFileId")

            if not (title and track_file_id and track_id):
                return

            key = (artist_id, normalize_title(title))
            album_track_map[key].append(
                {
                    "album_id": album_id,
                    "album_title": album_title,
                    "track_id": track_id,
                    "track_file_id": track_file_id,
                }
            )

        tracks_with_files = filter(lambda track: track.get("hasFile"), tracks)
        list(map(add_track_to_map, tracks_with_files))

    album_albums = filter(
        lambda album: album.get("albumType", "").lower() == "album", albums
    )
    list(map(process_album_for_map, album_albums))

    return album_track_map


def create_unverified_duplicate(
    artist_id, album_id, album_title, title, track_file_id, duplicate_albums
) -> Dict:
    """Create duplicate entry for unverified tracks"""
    return {
        "artist_id": artist_id,
        "single_album_id": album_id,
        "single_album_title": album_title,
        "track_title": title,
        "single_track_file_id": track_file_id,
        "duplicate_albums": duplicate_albums,
        "verified_albums": duplicate_albums,
        "verification_results": ["Audio verification disabled"],
        "confidence_scores": [0],
    }


def verify_and_mark_album_track(
    base_url,
    headers,
    track_id,
    track_file_id,
    album_track,
    docker_mount,
    single_file_path,
) -> Tuple[bool, Optional[Dict], str, int]:
    """Verify album track and mark for migration if perfect match"""
    album_track_id = album_track["track_id"]
    album_track_file_id = album_track["track_file_id"]

    album_track_file_info = get_trackfile_info(base_url, album_track_file_id, headers)
    album_file_path = (
        album_track_file_info.get("path") if album_track_file_info else None
    )

    match, result_message, confidence = verify_audio_match(
        base_url,
        headers,
        track_id,
        track_file_id,
        album_track_id,
        album_track_file_id,
        docker_mount,
    )

    if not match:
        logger.debug(
            f"Audio mismatch: single trackFileId {track_file_id} does not match album '{album_track['album_title']}' trackFileId {album_track_file_id} (confidence: {confidence}/100)"
        )
        return False, None, result_message, confidence

    album_track["confidence"] = confidence
    album_track["migration_status"] = (
        "eligible"
        if confidence >= 95 and single_file_path and album_file_path
        else "not_eligible"
    )
    if album_track["migration_status"] == "eligible":
        album_track["single_file_path"] = single_file_path
        album_track["album_file_path"] = album_file_path

    logger.debug(
        f"Audio match confirmed: single trackFileId {track_file_id} matches album '{album_track['album_title']}' trackFileId {album_track_file_id} (confidence: {confidence}/100)"
    )
    return True, album_track, result_message, confidence


def process_single_track(
    base_url,
    headers,
    album_id,
    artist_id,
    album_title,
    track,
    album_track_map,
    verify_audio,
    docker_mount,
) -> Optional[Dict]:
    """Process a single track and return duplicate info or None"""
    title = track.get("title")
    track_id = track.get("id")
    track_file_id = track.get("trackFileId")

    if not (title and track_file_id and track_id):
        return None

    key = (artist_id, normalize_title(title))
    if key not in album_track_map:
        return None

    duplicate_albums = album_track_map[key]
    if not duplicate_albums:
        return None

    if not verify_audio:
        return create_unverified_duplicate(
            artist_id, album_id, album_title, title, track_file_id, duplicate_albums
        )

    logger.debug(
        f"Verifying audio for '{title}' from single '{album_title}' against {len(duplicate_albums)} album track(s)..."
    )

    single_track_file_info = get_trackfile_info(base_url, track_file_id, headers)
    single_file_path = (
        single_track_file_info.get("path") if single_track_file_info else None
    )

    verification_data = list(
        map(
            lambda album_track: verify_and_mark_album_track(
                base_url,
                headers,
                track_id,
                track_file_id,
                album_track,
                docker_mount,
                single_file_path,
            ),
            duplicate_albums,
        )
    )

    verified_albums = [
        track for match, track, _, _ in verification_data if match and track
    ]
    verification_results = [result for _, _, result, _ in verification_data]
    confidence_scores = [conf for _, _, _, conf in verification_data]

    return {
        "artist_id": artist_id,
        "single_album_id": album_id,
        "single_album_title": album_title,
        "track_title": title,
        "single_track_file_id": track_file_id,
        "duplicate_albums": duplicate_albums,
        "verified_albums": verified_albums,
        "verification_results": verification_results,
        "confidence_scores": confidence_scores,
    }


def process_single_album(
    base_url, headers, album, album_track_map, verify_audio, docker_mount
) -> List[Dict]:
    """Process a single album and return list of duplicates found"""
    album_id = album.get("id")
    artist_id = album.get("artistId")
    album_title = album.get("title", "")

    if not (album_id and artist_id):
        return []

    tracks = fetch_tracks_for_album(base_url, headers, album_id)
    if not tracks:
        logger.debug(
            f"Skipping single album '{album_title}' (albumId: {album_id}) - could not fetch tracks"
        )
        return []

    tracks_with_files = filter(lambda track: track.get("hasFile"), tracks)
    process_track = lambda track: process_single_track(
        base_url,
        headers,
        album_id,
        artist_id,
        album_title,
        track,
        album_track_map,
        verify_audio,
        docker_mount,
    )
    duplicate_infos = map(process_track, tracks_with_files)

    return list(filter(lambda x: x is not None, duplicate_infos))


def find_duplicate_singles(
    base_url: str,
    headers: Dict[str, str],
    albums: List[Dict],
    album_track_map: Dict[Tuple[int, str], List[Dict]],
    verify_audio: bool = True,
    docker_mount: Optional[str] = None,
) -> List[Dict]:
    """Identify single tracks that duplicate album tracks"""
    single_albums = filter(
        lambda album: album.get("albumType", "").lower() == "single", albums
    )

    album_duplicates = map(
        lambda album: process_single_album(
            base_url, headers, album, album_track_map, verify_audio, docker_mount
        ),
        single_albums,
    )

    return [dup for album_dups in album_duplicates for dup in album_dups]