"""Audio verification using multiple methods""" import json import logging import os import subprocess from difflib import SequenceMatcher from typing import Dict, List, Optional, Tuple, Union logger = logging.getLogger(__name__) def map_docker_path(file_path: str, docker_mount: Optional[str] = None) -> str: """Map Docker container path to host path""" if not docker_mount: return file_path container_path, host_path = docker_mount.split(":", 1) if not file_path.startswith(container_path): return file_path return file_path.replace(container_path, host_path, 1) def get_audio_fingerprint( file_path: str, docker_mount: Optional[str] = None ) -> Optional[Tuple[str, int]]: """Generate audio fingerprint using fpcalc. Returns (fingerprint, duration)""" mapped_path = map_docker_path(file_path, docker_mount) logger.debug(f"Generating fingerprint for: {mapped_path}") if not os.path.exists(mapped_path): logger.warning(f"File not found: {mapped_path}") return None try: result = subprocess.run( ["fpcalc", "-json", "-length", "180", mapped_path], capture_output=True, text=True, timeout=60, check=False, ) if result.returncode != 0: logger.warning(f"fpcalc failed for {mapped_path}: {result.stderr}") return None data = json.loads(result.stdout) fingerprint = data.get("fingerprint") duration = data.get("duration") if not fingerprint or duration is None: logger.warning( f"fpcalc output missing fingerprint or duration for {mapped_path}" ) return None logger.debug(f"Successfully generated fingerprint (duration: {duration}s)") return fingerprint, duration except ( subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError, Exception, ) as e: logger.warning(f"Error generating fingerprint for {mapped_path}: {e}") return None def get_file_properties( file_path: str, docker_mount: Optional[str] = None ) -> Optional[Dict]: """Get audio file properties using ffprobe""" mapped_path = map_docker_path(file_path, docker_mount) if not os.path.exists(mapped_path): return None try: result = subprocess.run( [ "ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", mapped_path, ], capture_output=True, text=True, timeout=30, ) if result.returncode != 0: return None data = json.loads(result.stdout) audio_stream = next( (s for s in data.get("streams", []) if s.get("codec_type") == "audio"), None ) format_info = data.get("format", {}) if not audio_stream: return None return { "duration": float(format_info.get("duration", 0)), "size": int(format_info.get("size", 0)), "bitrate": int(format_info.get("bit_rate", 0)), "sample_rate": int(audio_stream.get("sample_rate", 0)), "channels": int(audio_stream.get("channels", 0)), "codec": audio_stream.get("codec_name", ""), "bit_depth": int(audio_stream.get("bits_per_raw_sample", 0)), } except Exception as e: logger.debug(f"Could not get file properties for {mapped_path}: {e}") return None def _format_context(log_context: Optional[str]) -> str: """Format log context message""" return f" ({log_context})" if log_context else "" def compare_fingerprints( fp1_data: Optional[Tuple[str, int]], fp2_data: Optional[Tuple[str, int]], log_context: Optional[str] = None, return_message: bool = False, ) -> Union[bool, Tuple[bool, str]]: """Compare audio fingerprints. Returns match or (match, message) if return_message=True""" if not fp1_data or not fp2_data: message = "Fingerprint comparison failed: missing fingerprint" logger.debug(f"{message}{_format_context(log_context)}") return (False, message) if return_message else False fp1, duration1 = fp1_data fp2, duration2 = fp2_data duration_diff = abs(duration1 - duration2) if duration_diff > 5: message = f"Fingerprint comparison: duration mismatch ({duration1}s vs {duration2}s, diff: {duration_diff}s)" logger.debug(f"{message}{_format_context(log_context)}") return (False, message) if return_message else False if fp1 == fp2: message = "Fingerprint comparison: exact match" logger.debug(f"{message}{_format_context(log_context)}") return (True, message) if return_message else True try: similarity = SequenceMatcher(None, fp1, fp2).ratio() if duration_diff <= 1: threshold = 0.90 elif duration_diff <= 3: threshold = 0.93 else: threshold = 0.95 match = similarity >= threshold message = f"Fingerprint comparison: similarity={similarity:.3f}, duration_diff={duration_diff}s, threshold={threshold:.2f}, match={match}" logger.debug(f"{message}{_format_context(log_context)}") return (match, message) if return_message else match except Exception as e: message = ( f"Fingerprint comparison failed: exception {type(e).__name__}: {str(e)}" ) logger.debug(f"{message}{_format_context(log_context)}") return (False, message) if return_message else False def check_mb_recording_id(single_track_info, album_track_info) -> Tuple[int, str]: """Check MusicBrainz Recording ID match. Returns (score_delta, message)""" if not (single_track_info and album_track_info): return 0, "⚠ MusicBrainz Recording ID unavailable" single_mb_id = single_track_info.get("foreignRecordingId") album_mb_id = album_track_info.get("foreignRecordingId") if not (single_mb_id and album_mb_id): return 0, "⚠ MusicBrainz Recording ID unavailable" if single_mb_id == album_mb_id: return 50, "✓ MusicBrainz Recording ID match (+50)" return -30, "✗ Different MusicBrainz Recording IDs (-30)" def check_quality_profile( single_file_info, album_file_info ) -> Tuple[int, Optional[str]]: """Check Lidarr quality profile match. Returns (score_delta, message)""" single_quality = ( single_file_info.get("quality", {}).get("quality", {}).get("name", "") ) album_quality = ( album_file_info.get("quality", {}).get("quality", {}).get("name", "") ) if not (single_quality and album_quality): return 0, None if single_quality == album_quality: return 10, f"✓ Same quality ({single_quality}) (+10)" return 0, f"⚠ Different quality ({single_quality} vs {album_quality})" def check_file_properties(single_props, album_props) -> List[Tuple[int, str]]: """Check file properties. Returns list of (score_delta, message) tuples""" if not (single_props and album_props): return [] results = [] duration_diff = abs(single_props["duration"] - album_props["duration"]) if duration_diff <= 1: results.append((15, f"✓ Duration match ({duration_diff:.1f}s diff) (+15)")) elif duration_diff <= 3: results.append((5, f"⚠ Close duration ({duration_diff:.1f}s diff) (+5)")) else: results.append((-10, f"✗ Duration mismatch ({duration_diff:.1f}s diff) (-10)")) size_ratio = min(single_props["size"], album_props["size"]) / max( single_props["size"], album_props["size"] ) if size_ratio >= 0.95: results.append((15, f"✓ File size match ({size_ratio:.2%}) (+15)")) elif size_ratio >= 0.85: results.append((5, f"⚠ Similar file size ({size_ratio:.2%}) (+5)")) else: results.append((0, f"⚠ Different file sizes ({size_ratio:.2%})")) if single_props["bitrate"] > 0 and album_props["bitrate"] > 0: bitrate_ratio = min(single_props["bitrate"], album_props["bitrate"]) / max( single_props["bitrate"], album_props["bitrate"] ) if bitrate_ratio >= 0.90: results.append((10, f"✓ Bitrate match ({bitrate_ratio:.2%}) (+10)")) if single_props["sample_rate"] == album_props["sample_rate"]: results.append( (5, f"✓ Sample rate match ({single_props['sample_rate']}Hz) (+5)") ) if single_props["codec"] and album_props["codec"]: if single_props["codec"] == album_props["codec"]: results.append((5, f"✓ Same codec ({single_props['codec']}) (+5)")) else: results.append( ( 0, f"⚠ Different codecs ({single_props['codec']} vs {album_props['codec']})", ) ) if single_props["channels"] == album_props["channels"]: results.append((5, f"✓ Same channels ({single_props['channels']}) (+5)")) else: results.append( ( 0, f"⚠ Different channels ({single_props['channels']} vs {album_props['channels']})", ) ) if single_props["bit_depth"] > 0 and album_props["bit_depth"] > 0: if single_props["bit_depth"] == album_props["bit_depth"]: results.append( (5, f"✓ Same bit depth ({single_props['bit_depth']}-bit) (+5)") ) else: results.append( ( 0, f"⚠ Different bit depths ({single_props['bit_depth']}-bit vs {album_props['bit_depth']}-bit)", ) ) return results