Files
plexfin-compare/analyze_movies.py
Danilo Reyes e772af13a7 Add initial project files for Jellyfin-Plex Library Checker
- Create .editorconfig for consistent coding styles.
- Add .envrc for direnv integration.
- Include .gitignore to exclude environment and build files.
- Implement compare_movies.py and analyze_movies.py for movie library comparison and analysis.
- Implement compare_series.py and analyze_series.py for TV series library comparison and analysis.
- Add configuration example in config.example.txt.
- Create README.md with project overview, setup instructions, and usage examples.
- Add LICENSE file for MIT License.
- Include flake.nix and flake.lock for Nix-based development environment.
- Add USAGE.md for quick start guide and common commands.
2025-12-05 01:57:15 -06:00

230 lines
8.4 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Analyze movies missing from Plex to identify common patterns.
"""
import json
from pathlib import Path
from collections import defaultdict
import re
def analyze_missing_movies(report_file='movies_comparison_report.json'):
with open(report_file, 'r') as f:
data = json.load(f)
missing = data.get('missing_from_plex', {})
if not missing:
print("No missing movies found!")
return
print(f"Analyzing {len(missing)} movies missing from Plex...\n")
# Analyze various attributes
extensions = defaultdict(int)
has_special_chars = []
has_brackets = []
has_edition_tag = []
has_imdb_tag = []
recently_released = []
directory_names = defaultdict(int)
path_depth = defaultdict(int)
file_sizes = []
naming_patterns = defaultdict(int)
for title, info in missing.items():
path = Path(info['path'])
# File extension
ext = path.suffix.lower()
extensions[ext] += 1
# Special characters in filename
filename = path.name
if re.search(r'[^a-zA-Z0-9\s\-_\.\(\)\[\]\{\}]', filename):
has_special_chars.append((title, filename))
# Brackets/braces patterns
if '{' in filename or '}' in filename:
has_brackets.append((title, filename))
# Edition tags
if '{edition-' in filename.lower():
has_edition_tag.append((title, filename))
# IMDB tags
if '{imdb-' in filename.lower():
has_imdb_tag.append((title, filename))
# Directory structure
parent_dir = path.parent.name
directory_names[parent_dir] += 1
# Path depth from Movies folder
try:
parts = path.parts
movies_idx = parts.index('Movies')
depth = len(parts) - movies_idx - 1
path_depth[depth] += 1
except (ValueError, IndexError):
pass
# Year extraction and recent releases
year_match = re.search(r'\((\d{4})\)', filename)
if year_match:
year = int(year_match.group(1))
if year >= 2023:
recently_released.append((title, year, filename))
# Naming pattern analysis
# Check for common patterns like "Movie (Year) {tags} - [quality]"
if re.search(r'\{[^}]+\}', filename):
naming_patterns['has_curly_braces'] += 1
if re.search(r'\[[^\]]+\]', filename):
naming_patterns['has_square_brackets'] += 1
if re.search(r'\((\d{4})\)', filename):
naming_patterns['has_year'] += 1
if re.search(r'(1080p|2160p|720p|4K)', filename, re.I):
naming_patterns['has_quality'] += 1
# Print analysis
print("="*80)
print("ANALYSIS RESULTS")
print("="*80)
print(f"\n📊 FILE EXTENSIONS:")
for ext, count in sorted(extensions.items(), key=lambda x: x[1], reverse=True):
pct = (count / len(missing)) * 100
print(f" {ext or 'no extension':15} {count:4} ({pct:.1f}%)")
print(f"\n🔤 NAMING PATTERNS:")
for pattern, count in sorted(naming_patterns.items(), key=lambda x: x[1], reverse=True):
pct = (count / len(missing)) * 100
print(f" {pattern:30} {count:4} ({pct:.1f}%)")
if has_edition_tag:
print(f"\n🏷️ EDITION TAGS: {len(has_edition_tag)}")
print(f" Movies with {{edition-...}} tags")
for title, filename in has_edition_tag[:5]:
print(f"{title[:60]}")
if len(has_edition_tag) > 5:
print(f" ... and {len(has_edition_tag) - 5} more")
if has_imdb_tag:
print(f"\n🎬 IMDB TAGS: {len(has_imdb_tag)}")
print(f" Movies with {{imdb-...}} tags")
print(f"\n📁 PATH DEPTH FROM 'Movies' FOLDER:")
for depth, count in sorted(path_depth.items()):
pct = (count / len(missing)) * 100
print(f" Depth {depth}: {count:4} ({pct:.1f}%)")
if recently_released:
print(f"\n📅 RECENT RELEASES (2023+): {len(recently_released)}")
recent_sorted = sorted(recently_released, key=lambda x: x[1], reverse=True)
for title, year, filename in recent_sorted[:10]:
print(f" {year} - {title[:60]}")
if len(recently_released) > 10:
print(f" ... and {len(recently_released) - 10} more")
if has_special_chars:
print(f"\n⚠️ SPECIAL CHARACTERS: {len(has_special_chars)}")
special_chars_found = set()
for title, filename in has_special_chars:
chars = re.findall(r'[^a-zA-Z0-9\s\-_\.\(\)\[\]\{\}]', filename)
special_chars_found.update(chars)
print(f" Characters found: {', '.join(repr(c) for c in sorted(special_chars_found))}")
print(f" Sample files:")
for title, filename in has_special_chars[:5]:
print(f"{filename[:75]}")
# Check for empty/weird directories
print(f"\n📂 UNUSUAL DIRECTORY PATTERNS:")
unusual_dirs = []
for dirname, count in directory_names.items():
if '()' in dirname or dirname.strip() == '' or len(dirname) < 3:
unusual_dirs.append((dirname, count))
if unusual_dirs:
for dirname, count in unusual_dirs[:10]:
print(f" '{dirname}': {count} movies")
else:
print(" None found")
# Find common directory patterns
print(f"\n📁 DIRECTORIES WITH MOST MISSING MOVIES:")
top_dirs = sorted(directory_names.items(), key=lambda x: x[1], reverse=True)[:10]
for dirname, count in top_dirs:
if count > 1:
print(f" {dirname}: {count} movies")
# Check if all in Jellyfin
all_in_jellyfin = all(info['in_jellyfin'] for info in missing.values())
some_in_jellyfin = sum(1 for info in missing.values() if info['in_jellyfin'])
print(f"\n🎥 JELLYFIN STATUS:")
print(f" Movies also in Jellyfin: {some_in_jellyfin}/{len(missing)} ({(some_in_jellyfin/len(missing)*100):.1f}%)")
if some_in_jellyfin == len(missing):
print(" ✓ ALL missing movies are visible in Jellyfin")
print(" → This suggests a Plex scanning/indexing issue, not file access")
elif some_in_jellyfin > 0:
print(f"{len(missing) - some_in_jellyfin} movies not in Jellyfin either")
print(" → These might have filesystem/permission issues")
print("\n" + "="*80)
print("RECOMMENDATIONS:")
print("="*80)
recommendations = []
if naming_patterns['has_curly_braces'] > len(missing) * 0.5:
recommendations.append(
"• Many files use curly braces {} in filenames\n"
" Plex might have trouble with certain special characters in names.\n"
" Consider: Plex 'Plex Dance' or manual metadata matching for these."
)
if has_edition_tag:
recommendations.append(
f"{len(has_edition_tag)} movies have {{edition-...}} tags\n"
" Plex may not recognize edition tags in curly braces.\n"
" Consider: Use Plex's edition support or move tags to square brackets."
)
if len(unusual_dirs) > 0:
recommendations.append(
"• Some movies are in directories with unusual names (empty, very short)\n"
" Plex scanner might skip these.\n"
" Consider: Rename directories to proper movie names."
)
if recently_released:
recommendations.append(
f"{len(recently_released)} movies are from 2023 or later\n"
" These might need manual metadata matching if too new for databases."
)
if all_in_jellyfin:
recommendations.append(
"• ALL missing movies are visible in Jellyfin\n"
" This strongly suggests the issue is with Plex's scanner/matcher, not file access.\n"
" Actions:\n"
" 1. Force a full library refresh in Plex\n"
" 2. Check Plex's 'Trash' for unmatched items\n"
" 3. Try 'Scan Library Files' then 'Refresh All Metadata'\n"
" 4. Check Plex server logs for scanner errors"
)
for i, rec in enumerate(recommendations, 1):
print(f"\n{i}. {rec}")
if not recommendations:
print("\n• Files appear normal. Try forcing a Plex library refresh.")
print("\n" + "="*80)
if __name__ == '__main__':
analyze_missing_movies()