#!/usr/bin/env python3 """ Analyze movies missing from Plex to identify common patterns. """ import json from pathlib import Path from collections import defaultdict import re def analyze_missing_movies(report_file='movies_comparison_report.json'): with open(report_file, 'r') as f: data = json.load(f) missing = data.get('missing_from_plex', {}) if not missing: print("No missing movies found!") return print(f"Analyzing {len(missing)} movies missing from Plex...\n") # Analyze various attributes extensions = defaultdict(int) has_special_chars = [] has_brackets = [] has_edition_tag = [] has_imdb_tag = [] recently_released = [] directory_names = defaultdict(int) path_depth = defaultdict(int) file_sizes = [] naming_patterns = defaultdict(int) for title, info in missing.items(): path = Path(info['path']) # File extension ext = path.suffix.lower() extensions[ext] += 1 # Special characters in filename filename = path.name if re.search(r'[^a-zA-Z0-9\s\-_\.\(\)\[\]\{\}]', filename): has_special_chars.append((title, filename)) # Brackets/braces patterns if '{' in filename or '}' in filename: has_brackets.append((title, filename)) # Edition tags if '{edition-' in filename.lower(): has_edition_tag.append((title, filename)) # IMDB tags if '{imdb-' in filename.lower(): has_imdb_tag.append((title, filename)) # Directory structure parent_dir = path.parent.name directory_names[parent_dir] += 1 # Path depth from Movies folder try: parts = path.parts movies_idx = parts.index('Movies') depth = len(parts) - movies_idx - 1 path_depth[depth] += 1 except (ValueError, IndexError): pass # Year extraction and recent releases year_match = re.search(r'\((\d{4})\)', filename) if year_match: year = int(year_match.group(1)) if year >= 2023: recently_released.append((title, year, filename)) # Naming pattern analysis # Check for common patterns like "Movie (Year) {tags} - [quality]" if re.search(r'\{[^}]+\}', filename): naming_patterns['has_curly_braces'] += 1 if re.search(r'\[[^\]]+\]', filename): naming_patterns['has_square_brackets'] += 1 if re.search(r'\((\d{4})\)', filename): naming_patterns['has_year'] += 1 if re.search(r'(1080p|2160p|720p|4K)', filename, re.I): naming_patterns['has_quality'] += 1 # Print analysis print("="*80) print("ANALYSIS RESULTS") print("="*80) print(f"\nšŸ“Š FILE EXTENSIONS:") for ext, count in sorted(extensions.items(), key=lambda x: x[1], reverse=True): pct = (count / len(missing)) * 100 print(f" {ext or 'no extension':15} {count:4} ({pct:.1f}%)") print(f"\nšŸ”¤ NAMING PATTERNS:") for pattern, count in sorted(naming_patterns.items(), key=lambda x: x[1], reverse=True): pct = (count / len(missing)) * 100 print(f" {pattern:30} {count:4} ({pct:.1f}%)") if has_edition_tag: print(f"\nšŸ·ļø EDITION TAGS: {len(has_edition_tag)}") print(f" Movies with {{edition-...}} tags") for title, filename in has_edition_tag[:5]: print(f" • {title[:60]}") if len(has_edition_tag) > 5: print(f" ... and {len(has_edition_tag) - 5} more") if has_imdb_tag: print(f"\nšŸŽ¬ IMDB TAGS: {len(has_imdb_tag)}") print(f" Movies with {{imdb-...}} tags") print(f"\nšŸ“ PATH DEPTH FROM 'Movies' FOLDER:") for depth, count in sorted(path_depth.items()): pct = (count / len(missing)) * 100 print(f" Depth {depth}: {count:4} ({pct:.1f}%)") if recently_released: print(f"\nšŸ“… RECENT RELEASES (2023+): {len(recently_released)}") recent_sorted = sorted(recently_released, key=lambda x: x[1], reverse=True) for title, year, filename in recent_sorted[:10]: print(f" {year} - {title[:60]}") if len(recently_released) > 10: print(f" ... and {len(recently_released) - 10} more") if has_special_chars: print(f"\nāš ļø SPECIAL CHARACTERS: {len(has_special_chars)}") special_chars_found = set() for title, filename in has_special_chars: chars = re.findall(r'[^a-zA-Z0-9\s\-_\.\(\)\[\]\{\}]', filename) special_chars_found.update(chars) print(f" Characters found: {', '.join(repr(c) for c in sorted(special_chars_found))}") print(f" Sample files:") for title, filename in has_special_chars[:5]: print(f" • {filename[:75]}") # Check for empty/weird directories print(f"\nšŸ“‚ UNUSUAL DIRECTORY PATTERNS:") unusual_dirs = [] for dirname, count in directory_names.items(): if '()' in dirname or dirname.strip() == '' or len(dirname) < 3: unusual_dirs.append((dirname, count)) if unusual_dirs: for dirname, count in unusual_dirs[:10]: print(f" '{dirname}': {count} movies") else: print(" None found") # Find common directory patterns print(f"\nšŸ“ DIRECTORIES WITH MOST MISSING MOVIES:") top_dirs = sorted(directory_names.items(), key=lambda x: x[1], reverse=True)[:10] for dirname, count in top_dirs: if count > 1: print(f" {dirname}: {count} movies") # Check if all in Jellyfin all_in_jellyfin = all(info['in_jellyfin'] for info in missing.values()) some_in_jellyfin = sum(1 for info in missing.values() if info['in_jellyfin']) print(f"\nšŸŽ„ JELLYFIN STATUS:") print(f" Movies also in Jellyfin: {some_in_jellyfin}/{len(missing)} ({(some_in_jellyfin/len(missing)*100):.1f}%)") if some_in_jellyfin == len(missing): print(" āœ“ ALL missing movies are visible in Jellyfin") print(" → This suggests a Plex scanning/indexing issue, not file access") elif some_in_jellyfin > 0: print(f" ⚠ {len(missing) - some_in_jellyfin} movies not in Jellyfin either") print(" → These might have filesystem/permission issues") print("\n" + "="*80) print("RECOMMENDATIONS:") print("="*80) recommendations = [] if naming_patterns['has_curly_braces'] > len(missing) * 0.5: recommendations.append( "• Many files use curly braces {} in filenames\n" " Plex might have trouble with certain special characters in names.\n" " Consider: Plex 'Plex Dance' or manual metadata matching for these." ) if has_edition_tag: recommendations.append( f"• {len(has_edition_tag)} movies have {{edition-...}} tags\n" " Plex may not recognize edition tags in curly braces.\n" " Consider: Use Plex's edition support or move tags to square brackets." ) if len(unusual_dirs) > 0: recommendations.append( "• Some movies are in directories with unusual names (empty, very short)\n" " Plex scanner might skip these.\n" " Consider: Rename directories to proper movie names." ) if recently_released: recommendations.append( f"• {len(recently_released)} movies are from 2023 or later\n" " These might need manual metadata matching if too new for databases." ) if all_in_jellyfin: recommendations.append( "• ALL missing movies are visible in Jellyfin\n" " This strongly suggests the issue is with Plex's scanner/matcher, not file access.\n" " Actions:\n" " 1. Force a full library refresh in Plex\n" " 2. Check Plex's 'Trash' for unmatched items\n" " 3. Try 'Scan Library Files' then 'Refresh All Metadata'\n" " 4. Check Plex server logs for scanner errors" ) for i, rec in enumerate(recommendations, 1): print(f"\n{i}. {rec}") if not recommendations: print("\n• Files appear normal. Try forcing a Plex library refresh.") print("\n" + "="*80) if __name__ == '__main__': analyze_missing_movies()