From e772af13a78521ff345fe94d886036a98e95b053 Mon Sep 17 00:00:00 2001 From: Danilo Reyes Date: Fri, 5 Dec 2025 01:57:15 -0600 Subject: [PATCH] Add initial project files for Jellyfin-Plex Library Checker - Create .editorconfig for consistent coding styles. - Add .envrc for direnv integration. - Include .gitignore to exclude environment and build files. - Implement compare_movies.py and analyze_movies.py for movie library comparison and analysis. - Implement compare_series.py and analyze_series.py for TV series library comparison and analysis. - Add configuration example in config.example.txt. - Create README.md with project overview, setup instructions, and usage examples. - Add LICENSE file for MIT License. - Include flake.nix and flake.lock for Nix-based development environment. - Add USAGE.md for quick start guide and common commands. --- .editorconfig | 23 ++ .envrc | 1 + .gitignore | 34 +++ LICENSE | 22 ++ README.md | 219 ++++++++++++++++++ USAGE.md | 67 ++++++ analyze_movies.py | 229 +++++++++++++++++++ analyze_series.py | 216 ++++++++++++++++++ compare_movies.py | 542 +++++++++++++++++++++++++++++++++++++++++++++ compare_series.py | 539 ++++++++++++++++++++++++++++++++++++++++++++ config.example.txt | 22 ++ flake.lock | 61 +++++ flake.nix | 97 ++++++++ 13 files changed, 2072 insertions(+) create mode 100644 .editorconfig create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 USAGE.md create mode 100755 analyze_movies.py create mode 100755 analyze_series.py create mode 100755 compare_movies.py create mode 100755 compare_series.py create mode 100644 config.example.txt create mode 100644 flake.lock create mode 100644 flake.nix diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..04f54ea --- /dev/null +++ b/.editorconfig @@ -0,0 +1,23 @@ +# EditorConfig helps maintain consistent coding styles +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +[*.py] +indent_style = space +indent_size = 4 + +[*.{yml,yaml,json}] +indent_style = space +indent_size = 2 + +[*.md] +trim_trailing_whitespace = false + +[*.nix] +indent_style = space +indent_size = 2 diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..8392d15 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5d60b87 --- /dev/null +++ b/.gitignore @@ -0,0 +1,34 @@ +# Environment and secrets +.env +.env.local + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +venv/ +env/ +ENV/ +*.egg-info/ +dist/ +build/ + +# Reports (generated by scripts) +*_comparison_report.json + +# Direnv +.direnv/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +Thumbs.db + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9920c99 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +MIT License + +Copyright (c) 2024 jawz + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + diff --git a/README.md b/README.md new file mode 100644 index 0000000..acc0dfe --- /dev/null +++ b/README.md @@ -0,0 +1,219 @@ +# Jellyfin-Plex Library Checker + +A comprehensive tool to compare and analyze media libraries between Plex, Jellyfin, and your filesystem. Identifies missing content, mismatches, and provides detailed analysis to help maintain consistency across your media servers. + +## Features + +- šŸŽ¬ **Movie Library Comparison** - Compare movies across Plex, Jellyfin, and filesystem +- šŸ“ŗ **TV Series Library Comparison** - Compare TV shows with episode counts +- šŸ” **Detailed Analysis** - Identify patterns in missing content (codecs, resolution, naming, etc.) +- 🚫 **.plexignore Detection** - Automatically checks for exclusion files +- šŸ“Š **Comprehensive Reports** - JSON and human-readable reports +- šŸ”§ **Multiple Path Support** - Handle libraries across multiple directories +- šŸŽÆ **Smart Matching** - Aggressive normalization handles title variations, years, tags, and language differences + +## The Problem This Solves + +When running both Plex and Jellyfin on the same media library, you might notice: +- Some movies show in Jellyfin but not in Plex +- Inconsistent episode counts between servers +- Difficulty tracking down which files aren't being scanned + +This tool helps you quickly identify and diagnose these issues by: +- Comparing what each server sees vs. what's actually on disk +- Analyzing codec, resolution, and metadata issues +- Detecting `.plexignore` files or other exclusions +- Finding orphaned entries in your databases + +## Setup + +### Option 1: Using Nix (Recommended) + +If you have Nix with Flakes enabled: + +```bash +nix develop +``` + +This will automatically: +- Set up a Python environment +- Create a virtual environment +- Install all dependencies from `requirements.txt` + +### Option 2: Using pip + +1. Install dependencies: +```bash +pip install -r requirements.txt +``` + +### Configuration + +Create a `.env` file in the project root (use `config.example.txt` as a template): + +```bash +# Plex Configuration +PLEX_URL=http://localhost:32400 +PLEX_TOKEN=your_plex_token_here + +# Jellyfin Configuration +JELLYFIN_URL=http://localhost:8096 +JELLYFIN_API_KEY=your_jellyfin_api_key_here +JELLYFIN_USER_ID=your_jellyfin_user_id_here + +# Filesystem Configuration +MOVIES_PATH=/path/to/your/movies/directory +SERIES_PATH=/path/to/your/tv/series/directory +# Optional: Additional series paths +SERIES_PATH_2=/path/to/backup/series +``` + +## Usage + +### For Movies + +**1. Compare movie libraries:** +```bash +python compare_movies.py +# Or with Nix: +nix run .#compare-movies +``` + +**2. Analyze missing movies:** +```bash +python analyze_movies.py +# Or with Nix: +nix run .#analyze-movies +``` + +The movie analyzer will report: +- File extensions and naming patterns +- Low resolution or unusual codecs +- Missing color metadata +- `.plexignore` file detection +- Recommendations for fixing issues + +### For TV Series + +**1. Compare series libraries:** +```bash +python compare_series.py +# Or with Nix: +nix run .#compare-series +``` + +**2. Analyze missing series:** +```bash +python analyze_series.py +# Or with Nix: +nix run .#analyze-series +``` + +The series analyzer will report: +- Episode counts (including empty directories) +- Naming patterns and special characters +- Title mismatches between servers +- `.plexignore` file detection + +## Getting API Credentials + +### Plex Token +1. Open Plex Web App in your browser +2. Open Developer Tools (F12) +3. Go to Network tab +4. Click on any request to your Plex server +5. Look for `X-Plex-Token` in the request headers +6. Copy that token value + +### Jellyfin API Key +1. Open Jellyfin Dashboard +2. Go to Dashboard → API Keys +3. Create a new API key +4. Copy the API key + +### Jellyfin User ID +1. Open Jellyfin Dashboard +2. Go to Dashboard → Users +3. Click on your user account +4. Look at the URL - it will contain something like `/Users/{userId}` +5. Copy the UUID part (format: `a1b2c3d4-5678-90ab-cdef-1234567890ab`) + +## How It Works + +### Matching Logic + +The tool uses multiple strategies to match content across systems: + +1. **Title Normalization** - Removes punctuation, years, articles, and special characters +2. **Path Matching** - Compares filesystem paths to handle title variations +3. **Flexible Comparison** - Handles: + - Different romanizations (e.g., "Haikyu!!" vs "Haikyuu!!") + - Language variations (e.g., "La familia P. Luche" vs "The Plush Family") + - Tag removal (e.g., `{imdb-tt123456}`, `[tvdbid-12345]`) + - Year differences in titles + +### Output Files + +- `movies_comparison_report.json` - Detailed movie comparison data +- `series_comparison_report.json` - Detailed TV series comparison data + +## Common Issues Found + +### Movies +- **Old codecs** (mpeg2, mpeg4/XviD) - Plex may skip these +- **Low resolution** (< 720p) - May be filtered as samples +- **Missing/invalid metadata** - Causes scanner issues +- **`.plexignore` exclusions** - Files explicitly ignored + +### TV Series +- **Empty directories** (0 episodes) - Plex correctly skips these +- **Title mismatches** - Different names between servers +- **Multiple library paths** - Content spread across locations + +## Example Output + +``` +šŸ“Š SUMMARY: + Movies missing from Plex (found in filesystem): 12 + Movies missing from Plex (found in Jellyfin): 12 + Movies in Plex but not in filesystem: 0 + +āš ļø Found .plexignore at: /srv/pool/multimedia/media/Movies/.plexignore + → Check if these movies are listed in it! + +šŸŽÆ LIKELY CAUSES: + • ALL missing movies are visible in Jellyfin + • Low resolution files (< 720p) might be filtered out by Plex + • Mixed SD/HD color metadata confuses Plex's codec detection + • Unusual/old codecs (mpeg2video, mpeg4) not well supported +``` + +## Troubleshooting + +### "No movies found" +- Verify `MOVIES_PATH` is correct and accessible +- Check file permissions + +### "Authentication failed" +- Verify your Plex token is correct +- Check Jellyfin API key and User ID + +### "Series reported as missing but I can see them in Plex" +- This is usually due to title variations +- The tool uses path matching as a fallback +- Check the JSON report for path details + +## Contributing + +Issues and pull requests are welcome! This tool was created to solve real media server management problems. + +## License + +MIT License - feel free to use and modify as needed. + +## Acknowledgments + +Built with: +- [plexapi](https://github.com/pkkid/python-plexapi) - Plex API client +- [requests](https://docs.python-requests.org/) - HTTP library +- [python-dotenv](https://github.com/theskumar/python-dotenv) - Environment management diff --git a/USAGE.md b/USAGE.md new file mode 100644 index 0000000..2a4ca6b --- /dev/null +++ b/USAGE.md @@ -0,0 +1,67 @@ +# Quick Start Guide + +## First Time Setup + +1. **Configure your environment:** + ```bash + cp config.example.txt .env + # Edit .env with your actual credentials and paths + ``` + +2. **Enter development environment (Nix users):** + ```bash + nix develop + ``` + + **Or install dependencies (pip users):** + ```bash + pip install -r requirements.txt + ``` + +## Quick Commands + +### Movies +```bash +# Compare movie libraries +python compare_movies.py + +# Analyze what's missing +python analyze_movies.py +``` + +### TV Series +```bash +# Compare series libraries +python compare_series.py + +# Analyze what's missing +python analyze_series.py +``` + +## With Nix Flakes +```bash +nix run .#compare-movies +nix run .#analyze-movies +nix run .#compare-series +nix run .#analyze-series +``` + +## Output Files + +- `movies_comparison_report.json` - Detailed movie data +- `series_comparison_report.json` - Detailed series data + +## First Steps After Running + +1. Check for `.plexignore` files (the scripts will warn you) +2. Review the analysis output for patterns +3. Force a Plex library scan if needed +4. Check Plex logs for scanner errors + +## Common Fixes + +- **Movies in `.plexignore`** → Remove entries, rescan Plex +- **Empty series directories** → Plex correctly ignores these +- **Old codecs (mpeg4, mpeg2)** → Re-encode or replace files +- **Low resolution** → Replace with HD versions +- **Title mismatches** → These are actually in Plex, just different names diff --git a/analyze_movies.py b/analyze_movies.py new file mode 100755 index 0000000..eab332b --- /dev/null +++ b/analyze_movies.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python3 +""" +Analyze movies missing from Plex to identify common patterns. +""" + +import json +from pathlib import Path +from collections import defaultdict +import re + +def analyze_missing_movies(report_file='movies_comparison_report.json'): + with open(report_file, 'r') as f: + data = json.load(f) + + missing = data.get('missing_from_plex', {}) + + if not missing: + print("No missing movies found!") + return + + print(f"Analyzing {len(missing)} movies missing from Plex...\n") + + # Analyze various attributes + extensions = defaultdict(int) + has_special_chars = [] + has_brackets = [] + has_edition_tag = [] + has_imdb_tag = [] + recently_released = [] + directory_names = defaultdict(int) + path_depth = defaultdict(int) + file_sizes = [] + naming_patterns = defaultdict(int) + + for title, info in missing.items(): + path = Path(info['path']) + + # File extension + ext = path.suffix.lower() + extensions[ext] += 1 + + # Special characters in filename + filename = path.name + if re.search(r'[^a-zA-Z0-9\s\-_\.\(\)\[\]\{\}]', filename): + has_special_chars.append((title, filename)) + + # Brackets/braces patterns + if '{' in filename or '}' in filename: + has_brackets.append((title, filename)) + + # Edition tags + if '{edition-' in filename.lower(): + has_edition_tag.append((title, filename)) + + # IMDB tags + if '{imdb-' in filename.lower(): + has_imdb_tag.append((title, filename)) + + # Directory structure + parent_dir = path.parent.name + directory_names[parent_dir] += 1 + + # Path depth from Movies folder + try: + parts = path.parts + movies_idx = parts.index('Movies') + depth = len(parts) - movies_idx - 1 + path_depth[depth] += 1 + except (ValueError, IndexError): + pass + + # Year extraction and recent releases + year_match = re.search(r'\((\d{4})\)', filename) + if year_match: + year = int(year_match.group(1)) + if year >= 2023: + recently_released.append((title, year, filename)) + + # Naming pattern analysis + # Check for common patterns like "Movie (Year) {tags} - [quality]" + if re.search(r'\{[^}]+\}', filename): + naming_patterns['has_curly_braces'] += 1 + if re.search(r'\[[^\]]+\]', filename): + naming_patterns['has_square_brackets'] += 1 + if re.search(r'\((\d{4})\)', filename): + naming_patterns['has_year'] += 1 + if re.search(r'(1080p|2160p|720p|4K)', filename, re.I): + naming_patterns['has_quality'] += 1 + + # Print analysis + print("="*80) + print("ANALYSIS RESULTS") + print("="*80) + + print(f"\nšŸ“Š FILE EXTENSIONS:") + for ext, count in sorted(extensions.items(), key=lambda x: x[1], reverse=True): + pct = (count / len(missing)) * 100 + print(f" {ext or 'no extension':15} {count:4} ({pct:.1f}%)") + + print(f"\nšŸ”¤ NAMING PATTERNS:") + for pattern, count in sorted(naming_patterns.items(), key=lambda x: x[1], reverse=True): + pct = (count / len(missing)) * 100 + print(f" {pattern:30} {count:4} ({pct:.1f}%)") + + if has_edition_tag: + print(f"\nšŸ·ļø EDITION TAGS: {len(has_edition_tag)}") + print(f" Movies with {{edition-...}} tags") + for title, filename in has_edition_tag[:5]: + print(f" • {title[:60]}") + if len(has_edition_tag) > 5: + print(f" ... and {len(has_edition_tag) - 5} more") + + if has_imdb_tag: + print(f"\nšŸŽ¬ IMDB TAGS: {len(has_imdb_tag)}") + print(f" Movies with {{imdb-...}} tags") + + print(f"\nšŸ“ PATH DEPTH FROM 'Movies' FOLDER:") + for depth, count in sorted(path_depth.items()): + pct = (count / len(missing)) * 100 + print(f" Depth {depth}: {count:4} ({pct:.1f}%)") + + if recently_released: + print(f"\nšŸ“… RECENT RELEASES (2023+): {len(recently_released)}") + recent_sorted = sorted(recently_released, key=lambda x: x[1], reverse=True) + for title, year, filename in recent_sorted[:10]: + print(f" {year} - {title[:60]}") + if len(recently_released) > 10: + print(f" ... and {len(recently_released) - 10} more") + + if has_special_chars: + print(f"\nāš ļø SPECIAL CHARACTERS: {len(has_special_chars)}") + special_chars_found = set() + for title, filename in has_special_chars: + chars = re.findall(r'[^a-zA-Z0-9\s\-_\.\(\)\[\]\{\}]', filename) + special_chars_found.update(chars) + print(f" Characters found: {', '.join(repr(c) for c in sorted(special_chars_found))}") + print(f" Sample files:") + for title, filename in has_special_chars[:5]: + print(f" • {filename[:75]}") + + # Check for empty/weird directories + print(f"\nšŸ“‚ UNUSUAL DIRECTORY PATTERNS:") + unusual_dirs = [] + for dirname, count in directory_names.items(): + if '()' in dirname or dirname.strip() == '' or len(dirname) < 3: + unusual_dirs.append((dirname, count)) + + if unusual_dirs: + for dirname, count in unusual_dirs[:10]: + print(f" '{dirname}': {count} movies") + else: + print(" None found") + + # Find common directory patterns + print(f"\nšŸ“ DIRECTORIES WITH MOST MISSING MOVIES:") + top_dirs = sorted(directory_names.items(), key=lambda x: x[1], reverse=True)[:10] + for dirname, count in top_dirs: + if count > 1: + print(f" {dirname}: {count} movies") + + # Check if all in Jellyfin + all_in_jellyfin = all(info['in_jellyfin'] for info in missing.values()) + some_in_jellyfin = sum(1 for info in missing.values() if info['in_jellyfin']) + + print(f"\nšŸŽ„ JELLYFIN STATUS:") + print(f" Movies also in Jellyfin: {some_in_jellyfin}/{len(missing)} ({(some_in_jellyfin/len(missing)*100):.1f}%)") + + if some_in_jellyfin == len(missing): + print(" āœ“ ALL missing movies are visible in Jellyfin") + print(" → This suggests a Plex scanning/indexing issue, not file access") + elif some_in_jellyfin > 0: + print(f" ⚠ {len(missing) - some_in_jellyfin} movies not in Jellyfin either") + print(" → These might have filesystem/permission issues") + + print("\n" + "="*80) + print("RECOMMENDATIONS:") + print("="*80) + + recommendations = [] + + if naming_patterns['has_curly_braces'] > len(missing) * 0.5: + recommendations.append( + "• Many files use curly braces {} in filenames\n" + " Plex might have trouble with certain special characters in names.\n" + " Consider: Plex 'Plex Dance' or manual metadata matching for these." + ) + + if has_edition_tag: + recommendations.append( + f"• {len(has_edition_tag)} movies have {{edition-...}} tags\n" + " Plex may not recognize edition tags in curly braces.\n" + " Consider: Use Plex's edition support or move tags to square brackets." + ) + + if len(unusual_dirs) > 0: + recommendations.append( + "• Some movies are in directories with unusual names (empty, very short)\n" + " Plex scanner might skip these.\n" + " Consider: Rename directories to proper movie names." + ) + + if recently_released: + recommendations.append( + f"• {len(recently_released)} movies are from 2023 or later\n" + " These might need manual metadata matching if too new for databases." + ) + + if all_in_jellyfin: + recommendations.append( + "• ALL missing movies are visible in Jellyfin\n" + " This strongly suggests the issue is with Plex's scanner/matcher, not file access.\n" + " Actions:\n" + " 1. Force a full library refresh in Plex\n" + " 2. Check Plex's 'Trash' for unmatched items\n" + " 3. Try 'Scan Library Files' then 'Refresh All Metadata'\n" + " 4. Check Plex server logs for scanner errors" + ) + + for i, rec in enumerate(recommendations, 1): + print(f"\n{i}. {rec}") + + if not recommendations: + print("\n• Files appear normal. Try forcing a Plex library refresh.") + + print("\n" + "="*80) + +if __name__ == '__main__': + analyze_missing_movies() + diff --git a/analyze_series.py b/analyze_series.py new file mode 100755 index 0000000..a225809 --- /dev/null +++ b/analyze_series.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" +Analyze TV series missing from Plex to identify common patterns. +""" + +import json +from pathlib import Path +from collections import defaultdict +import re + +def analyze_missing_series(report_file='series_comparison_report.json'): + with open(report_file, 'r') as f: + data = json.load(f) + + missing = data.get('missing_from_plex', {}) + + if not missing: + print("No missing TV series found!") + return + + print(f"Analyzing {len(missing)} TV series missing from Plex...\n") + + # Analyze various attributes + has_special_chars = [] + has_brackets = [] + has_edition_tag = [] + has_imdb_tag = [] + has_tvdb_tag = [] + recently_released = [] + directory_patterns = defaultdict(int) + episode_counts = [] + + for title, info in missing.items(): + path = Path(info['path']) + dirname = path.name + episode_count = info.get('episode_count', 0) + episode_counts.append(episode_count) + + # Special characters in directory name + if re.search(r'[^a-zA-Z0-9\s\-_\.\(\)\[\]\{\}]', dirname): + has_special_chars.append((title, dirname)) + + # Brackets/braces patterns + if '{' in dirname or '}' in dirname: + has_brackets.append((title, dirname)) + + # Edition tags + if '{edition-' in dirname.lower(): + has_edition_tag.append((title, dirname)) + + # IMDB/TVDB tags + if '{imdb-' in dirname.lower(): + has_imdb_tag.append((title, dirname)) + if '{tvdb-' in dirname.lower(): + has_tvdb_tag.append((title, dirname)) + + # Year extraction + year_match = re.search(r'\((\d{4})\)', dirname) + if year_match: + year = int(year_match.group(1)) + if year >= 2020: + recently_released.append((title, year, dirname)) + + # Directory naming patterns + # Check for patterns like "Show (Year)" or "Show Name - [Quality]" + if re.search(r'\((\d{4})\)', dirname): + directory_patterns['has_year'] += 1 + if re.search(r'\{[^}]+\}', dirname): + directory_patterns['has_curly_braces'] += 1 + if re.search(r'\[[^\]]+\]', dirname): + directory_patterns['has_square_brackets'] += 1 + + # Print analysis + print("="*80) + print("ANALYSIS RESULTS") + print("="*80) + + print(f"\nšŸ“Š EPISODE COUNTS:") + if episode_counts: + print(f" Total episodes: {sum(episode_counts)}") + print(f" Average per series: {sum(episode_counts) / len(episode_counts):.1f}") + print(f" Min: {min(episode_counts)}, Max: {max(episode_counts)}") + + # Count series with no episodes + no_episodes = sum(1 for c in episode_counts if c == 0) + if no_episodes > 0: + print(f" āš ļø Series with 0 episodes: {no_episodes}") + + print(f"\nšŸ”¤ NAMING PATTERNS:") + for pattern, count in sorted(directory_patterns.items(), key=lambda x: x[1], reverse=True): + pct = (count / len(missing)) * 100 + print(f" {pattern:30} {count:4} ({pct:.1f}%)") + + if has_imdb_tag: + print(f"\nšŸŽ¬ IMDB TAGS: {len(has_imdb_tag)}") + print(f" Series with {{imdb-...}} tags") + + if has_tvdb_tag: + print(f"\nšŸ“ŗ TVDB TAGS: {len(has_tvdb_tag)}") + print(f" Series with {{tvdb-...}} tags") + + if has_edition_tag: + print(f"\nšŸ·ļø EDITION TAGS: {len(has_edition_tag)}") + print(f" Series with {{edition-...}} tags") + + if recently_released: + print(f"\nšŸ“… RECENT SERIES (2020+): {len(recently_released)}") + recent_sorted = sorted(recently_released, key=lambda x: x[1], reverse=True) + for title, year, dirname in recent_sorted[:10]: + print(f" {year} - {title[:60]}") + if len(recently_released) > 10: + print(f" ... and {len(recently_released) - 10} more") + + if has_special_chars: + print(f"\nāš ļø SPECIAL CHARACTERS: {len(has_special_chars)}") + special_chars_found = set() + for title, dirname in has_special_chars: + chars = re.findall(r'[^a-zA-Z0-9\s\-_\.\(\)\[\]\{\}]', dirname) + special_chars_found.update(chars) + print(f" Characters found: {', '.join(repr(c) for c in sorted(special_chars_found))}") + print(f" Sample directories:") + for title, dirname in has_special_chars[:5]: + print(f" • {dirname[:75]}") + + # Check if all in Jellyfin + all_in_jellyfin = all(info['in_jellyfin'] for info in missing.values()) + some_in_jellyfin = sum(1 for info in missing.values() if info['in_jellyfin']) + + print(f"\nšŸ“ŗ JELLYFIN STATUS:") + print(f" Series also in Jellyfin: {some_in_jellyfin}/{len(missing)} ({(some_in_jellyfin/len(missing)*100):.1f}%)") + + if some_in_jellyfin == len(missing): + print(" āœ“ ALL missing series are visible in Jellyfin") + print(" → This suggests a Plex scanning/indexing issue") + elif some_in_jellyfin > 0: + print(f" ⚠ {len(missing) - some_in_jellyfin} series not in Jellyfin either") + print(" → These might have filesystem/permission issues") + + # Check for .plexignore hints + print(f"\nšŸ” CHECK FOR .PLEXIGNORE:") + # Check all unique parent directories + parent_dirs = set() + for info in missing.values(): + path = Path(info['path']) + parent_dirs.add(path.parent) + + found_plexignore = False + for parent_dir in parent_dirs: + plexignore_path = parent_dir / '.plexignore' + if plexignore_path.exists(): + print(f" āš ļø Found .plexignore at: {plexignore_path}") + print(f" → Check if these series are listed in it!") + found_plexignore = True + + if not found_plexignore: + print(f" No .plexignore files found in series directories") + + print("\n" + "="*80) + print("RECOMMENDATIONS:") + print("="*80) + + recommendations = [] + + # Check for .plexignore first + if plexignore_path.exists(): + recommendations.append( + "• .plexignore file detected!\n" + f" Check: {plexignore_path}\n" + " These series might be explicitly excluded." + ) + + if some_in_jellyfin == len(missing): + recommendations.append( + "• ALL missing series are visible in Jellyfin\n" + " Actions:\n" + " 1. Force a full library refresh in Plex\n" + " 2. Check Plex's TV Shows library scanner settings\n" + " 3. Verify series naming follows Plex conventions\n" + " 4. Check Plex server logs for scanner errors" + ) + + no_episodes = sum(1 for c in episode_counts if c == 0) + if no_episodes > 0: + recommendations.append( + f"• {no_episodes} series have 0 episodes detected\n" + " These directories might be empty or improperly structured.\n" + " Plex requires proper season/episode folder structure." + ) + + if has_special_chars: + recommendations.append( + f"• {len(has_special_chars)} series have special characters in names\n" + " Some characters might cause issues with Plex scanner." + ) + + for i, rec in enumerate(recommendations, 1): + print(f"\n{i}. {rec}") + + if not recommendations: + print("\n• Series appear normal. Try forcing a Plex library refresh.") + + print("\n" + "="*80) + + # List all missing series + print("\nšŸ“‹ COMPLETE LIST OF MISSING SERIES:") + print("="*80) + for i, (title, info) in enumerate(sorted(missing.items()), 1): + jf_status = "āœ“" if info['in_jellyfin'] else "āœ—" + ep_count = info.get('episode_count', 0) + print(f"{i:3}. [{jf_status}] {title} ({ep_count} episodes)") + print("="*80) + + +if __name__ == '__main__': + analyze_missing_series() + diff --git a/compare_movies.py b/compare_movies.py new file mode 100755 index 0000000..2f7ce24 --- /dev/null +++ b/compare_movies.py @@ -0,0 +1,542 @@ +#!/usr/bin/env python3 +""" +Compare Plex and Jellyfin movie libraries to find discrepancies. +Identifies movies that exist in filesystem/Jellyfin but are missing from Plex. +""" + +import os +import sys +from pathlib import Path +from typing import Set, Dict, List, Tuple +from collections import defaultdict +import json + +try: + from plexapi.server import PlexServer + from plexapi.exceptions import NotFound, Unauthorized +except ImportError: + print("Error: plexapi not installed. Run: pip install -r requirements.txt") + sys.exit(1) + +import requests +from requests.exceptions import RequestException +from dotenv import load_dotenv + + +class MovieLibraryComparator: + def __init__(self, plex_url: str, plex_token: str, + jellyfin_url: str, jellyfin_api_key: str, jellyfin_user_id: str, + movies_path: str): + self.plex_url = plex_url + self.plex_token = plex_token + self.jellyfin_url = jellyfin_url.rstrip('/') + self.jellyfin_api_key = jellyfin_api_key + self.jellyfin_user_id = jellyfin_user_id + self.movies_path = Path(movies_path) + # Normalize the movies path for filtering + self.movies_path_normalized = self.normalize_path(str(self.movies_path)) + + def get_filesystem_movies(self) -> Dict[str, Dict]: + """Scan filesystem for movie files and return normalized title -> path mapping.""" + movies = {} + video_extensions = {'.mkv', '.avi', '.mp4', '.m4v', '.mov', '.wmv', '.flv', '.webm'} + + print(f"Scanning filesystem at: {self.movies_path}") + + if not self.movies_path.exists(): + print(f"Warning: Movies path does not exist: {self.movies_path}") + return movies + + for video_file in self.movies_path.rglob('*'): + if video_file.is_file() and video_file.suffix.lower() in video_extensions: + # Use filename without extension as key + title = video_file.stem + movies[title] = { + 'path': video_file, + 'full_path': str(video_file), + 'filename': video_file.name, + 'stem': video_file.stem + } + + print(f"Found {len(movies)} video files in filesystem") + return movies + + def get_plex_movies(self) -> Dict[str, Dict]: + """Query Plex API to get all movies.""" + print("\nConnecting to Plex...") + try: + plex = PlexServer(self.plex_url, self.plex_token) + movies_section = None + + # Find the Movies library section + for section in plex.library.sections(): + if section.type == 'movie': + movies_section = section + break + + if not movies_section: + print("Error: No Movies library found in Plex") + return {} + + print(f"Found Plex Movies library: {movies_section.title}") + movies_section.refresh() # Refresh to get latest data + + plex_movies = {} + for movie in movies_section.all(): + # Normalize title - use filename if available, otherwise title + key = movie.title + file_path = '' + + if hasattr(movie, 'media') and movie.media: + # Try to get the actual filename + for media in movie.media: + if hasattr(media, 'parts') and media.parts: + for part in media.parts: + if hasattr(part, 'file'): + file_path = part.file + filename = Path(part.file).stem + key = filename + break + if file_path: + break + + plex_movies[key] = { + 'title': movie.title, + 'year': getattr(movie, 'year', None), + 'file': file_path, + 'added_at': getattr(movie, 'addedAt', None), + } + + print(f"Found {len(plex_movies)} movies in Plex") + return plex_movies + + except Unauthorized: + print("Error: Plex authentication failed. Check your PLEX_TOKEN") + return {} + except Exception as e: + print(f"Error connecting to Plex: {e}") + return {} + + def get_jellyfin_movies(self) -> Dict[str, Dict]: + """Query Jellyfin API to get all movies.""" + print("\nConnecting to Jellyfin...") + + headers = { + 'X-Emby-Token': self.jellyfin_api_key, + 'Content-Type': 'application/json' + } + + # Get all items from the Movies library + url = f"{self.jellyfin_url}/Users/{self.jellyfin_user_id}/Items" + params = { + 'Recursive': 'true', + 'IncludeItemTypes': 'Movie', + 'Fields': 'Path,MediaSources,DateCreated', + 'Limit': 1000 # Adjust if you have more than 1000 movies + } + + try: + all_movies = {} + start_index = 0 + movies_without_path = 0 + + while True: + params['StartIndex'] = start_index + response = requests.get(url, headers=headers, params=params, timeout=30) + response.raise_for_status() + + data = response.json() + items = data.get('Items', []) + + if not items: + break + + for item in items: + item_id = item.get('Id', '') + title = item.get('Name', '') + + # Try to get path from multiple sources + path = item.get('Path', '') + + # If Path is empty or looks like a directory, try MediaSources + if not path or (path and not Path(path).suffix): + media_sources = item.get('MediaSources', []) + if media_sources: + # Get the first media source's path + for media_source in media_sources: + media_path = media_source.get('Path', '') + if media_path and Path(media_path).suffix: + path = media_path + break + + # If still no path, try to get it from the item details endpoint + if not path or (path and not Path(path).suffix): + try: + item_url = f"{self.jellyfin_url}/Users/{self.jellyfin_user_id}/Items/{item_id}" + item_response = requests.get(item_url, headers=headers, params={'Fields': 'MediaSources'}, timeout=10) + if item_response.status_code == 200: + item_data = item_response.json() + media_sources = item_data.get('MediaSources', []) + if media_sources: + for media_source in media_sources: + media_path = media_source.get('Path', '') + if media_path and Path(media_path).suffix: + path = media_path + break + except: + pass # If we can't get details, continue with what we have + + # Filter: Only include movies from the configured Movies directory + if path: + path_normalized = self.normalize_path(path) + # Skip if not in the movies path + if not path_normalized.startswith(self.movies_path_normalized): + continue + else: + # Skip movies without paths as we can't determine their location + continue + + # Determine key and store movie info + if path and Path(path).suffix: + filename = Path(path).stem + key = filename + else: + # Use title as fallback + key = title + movies_without_path += 1 + + all_movies[key] = { + 'title': title, + 'year': item.get('ProductionYear'), + 'path': path, + 'id': item_id, + 'date_created': item.get('DateCreated'), + } + + # Check if there are more items + total_records = data.get('TotalRecordCount', 0) + if start_index + len(items) >= total_records: + break + + start_index += len(items) + + print(f"Found {len(all_movies)} movies in Jellyfin") + if movies_without_path > 0: + print(f"Warning: {movies_without_path} movies without file paths (using title as key)") + return all_movies + + except RequestException as e: + print(f"Error connecting to Jellyfin: {e}") + if hasattr(e, 'response') and e.response is not None: + print(f"Response: {e.response.text}") + return {} + + def normalize_title(self, title: str) -> str: + """Normalize title for comparison (lowercase, remove special chars).""" + return title.lower().strip() + + def normalize_path(self, path: str) -> str: + """Normalize path for comparison.""" + if not path: + return "" + return str(Path(path)).lower().replace('\\', '/').strip() + + def build_jellyfin_lookup(self, jellyfin_movies: Dict) -> Dict[str, Dict]: + """Build a lookup index for Jellyfin movies by normalized paths and stems.""" + lookup = {} + + for jf_key, jf_data in jellyfin_movies.items(): + jf_path = jf_data.get('path', '') + if jf_path: + path_obj = Path(jf_path) + + # Index by normalized full path + path_norm = self.normalize_path(jf_path) + lookup[path_norm] = jf_data + + # If it's a file (has extension), index by stem and filename + if path_obj.suffix: + # Index by normalized filename stem + stem = path_obj.stem + stem_norm = self.normalize_title(stem) + lookup[stem_norm] = jf_data + + # Also index by filename (with extension) + filename = path_obj.name + filename_norm = self.normalize_title(filename) + lookup[filename_norm] = jf_data + else: + # It's a directory path, try to find files in it + # This handles cases where Jellyfin returns directory paths + try: + if path_obj.exists() and path_obj.is_dir(): + for video_file in path_obj.glob('*'): + if video_file.is_file() and video_file.suffix.lower() in {'.mkv', '.avi', '.mp4', '.m4v', '.mov', '.wmv', '.flv', '.webm'}: + stem = video_file.stem + stem_norm = self.normalize_title(stem) + lookup[stem_norm] = jf_data + + filename = video_file.name + filename_norm = self.normalize_title(filename) + lookup[filename_norm] = jf_data + + full_path_norm = self.normalize_path(str(video_file)) + lookup[full_path_norm] = jf_data + except: + pass # If we can't access the path, skip + + # Also index by the key itself (normalized) + if jf_key: + key_norm = self.normalize_title(jf_key) + lookup[key_norm] = jf_data + + # Also index by title (normalized) + title = jf_data.get('title', '') + if title: + title_norm = self.normalize_title(title) + lookup[title_norm] = jf_data + + return lookup + + def find_jellyfin_match(self, fs_path: str, fs_stem: str, jellyfin_lookup: Dict) -> Tuple[bool, str]: + """Find if a filesystem movie exists in Jellyfin using the lookup index.""" + fs_path_norm = self.normalize_path(fs_path) + fs_stem_norm = self.normalize_title(fs_stem) + fs_filename = Path(fs_path).name + fs_filename_norm = self.normalize_title(fs_filename) + + # Extract just the filename part from the path for matching + # This handles cases where paths might be in different formats + fs_path_parts = Path(fs_path).parts + fs_basename = fs_path_parts[-1] if fs_path_parts else fs_filename + fs_basename_norm = self.normalize_title(fs_basename) + fs_basename_stem_norm = self.normalize_title(Path(fs_basename).stem) + + # Try multiple matching strategies in order of specificity + match_keys = [ + fs_path_norm, # Full normalized path + fs_stem_norm, # Filename stem + fs_filename_norm, # Full filename with extension + fs_basename_norm, # Just the basename + fs_basename_stem_norm, # Basename without extension + ] + + for norm_key in match_keys: + if norm_key and norm_key in jellyfin_lookup: + jf_data = jellyfin_lookup[norm_key] + return True, jf_data.get('title', '') + + # Also try partial path matching (in case paths differ slightly) + # Check if any part of the filesystem path matches any Jellyfin path + if fs_path_norm: + for jf_norm_key, jf_data in jellyfin_lookup.items(): + # Check if paths overlap significantly + if (fs_path_norm in jf_norm_key or jf_norm_key in fs_path_norm or + fs_stem_norm in jf_norm_key or jf_norm_key in fs_stem_norm): + # Additional check: make sure it's not just a partial word match + if len(fs_stem_norm) > 5 and len(jf_norm_key) > 5: + return True, jf_data.get('title', '') + + return False, None + + def compare_libraries(self) -> Tuple[Dict, Dict, Dict]: + """Compare all three sources and return discrepancies.""" + fs_movies = self.get_filesystem_movies() + plex_movies = self.get_plex_movies() + jellyfin_movies = self.get_jellyfin_movies() + + # Build Jellyfin lookup index for efficient matching + jellyfin_lookup = self.build_jellyfin_lookup(jellyfin_movies) + + # Normalize keys for comparison + fs_normalized = {self.normalize_title(k): (k, v) for k, v in fs_movies.items()} + plex_normalized = {self.normalize_title(k): (k, v) for k, v in plex_movies.items()} + jellyfin_normalized = {self.normalize_title(k): (k, v) for k, v in jellyfin_movies.items()} + + # Find movies in filesystem but not in Plex + missing_from_plex = {} + debug_samples = [] # Store first few for debugging + + for norm_key, (orig_key, fs_data) in fs_normalized.items(): + if norm_key not in plex_normalized: + # Check if it's in Jellyfin using improved matching + fs_path = fs_data['full_path'] + fs_stem = fs_data['stem'] + in_jellyfin, jf_title = self.find_jellyfin_match(fs_path, fs_stem, jellyfin_lookup) + + # Store debug info for first few unmatched items + if not in_jellyfin and len(debug_samples) < 3: + debug_samples.append({ + 'fs_path': fs_path, + 'fs_stem': fs_stem, + 'fs_norm_path': self.normalize_path(fs_path), + 'fs_norm_stem': self.normalize_title(fs_stem), + 'jellyfin_keys_sample': list(jellyfin_lookup.keys())[:5] if jellyfin_lookup else [] + }) + + missing_from_plex[orig_key] = { + 'path': fs_path, + 'in_jellyfin': in_jellyfin, + 'jellyfin_title': jf_title + } + + # Print debug info if we have samples + if debug_samples: + print("\nšŸ” DEBUG: Sample of unmatched files (first 3):") + for i, sample in enumerate(debug_samples, 1): + print(f"\n Sample {i}:") + print(f" FS Path: {sample['fs_path']}") + print(f" FS Stem: {sample['fs_stem']}") + print(f" Normalized Path: {sample['fs_norm_path']}") + print(f" Normalized Stem: {sample['fs_norm_stem']}") + print(f" Sample Jellyfin keys: {sample['jellyfin_keys_sample']}") + + # Find movies in Jellyfin but not in Plex + missing_from_plex_jellyfin = {} + for orig_key, data in jellyfin_movies.items(): + jf_stem = Path(data['path']).stem if data.get('path') else orig_key + jf_norm = self.normalize_title(jf_stem) + + if jf_norm not in plex_normalized: + # Check if it's in filesystem using improved matching + jf_path = data.get('path', '') + in_filesystem = False + fs_path = None + + if jf_path: + jf_path_norm = self.normalize_path(jf_path) + jf_stem_norm = self.normalize_title(jf_stem) + jf_filename = Path(jf_path).name + jf_filename_norm = self.normalize_title(jf_filename) + + # Try to find matching filesystem movie + for fs_orig_key, fs_data in fs_movies.items(): + fs_path_str = fs_data['full_path'] + fs_path_norm = self.normalize_path(fs_path_str) + fs_stem_norm = self.normalize_title(fs_data['stem']) + fs_filename_norm = self.normalize_title(fs_data['filename']) + + # Match by path, stem, or filename + if (jf_path_norm == fs_path_norm or + jf_stem_norm == fs_stem_norm or + jf_filename_norm == fs_filename_norm or + jf_path_norm in fs_path_norm or + fs_path_norm in jf_path_norm): + in_filesystem = True + fs_path = fs_path_str + break + + missing_from_plex_jellyfin[orig_key] = { + 'title': data['title'], + 'path': jf_path, + 'in_filesystem': in_filesystem, + 'filesystem_path': fs_path + } + + # Find movies in Plex but not in filesystem (orphaned) + orphaned_in_plex = {} + for norm_key, (orig_key, data) in plex_normalized.items(): + if norm_key not in fs_normalized: + orphaned_in_plex[orig_key] = data + + return missing_from_plex, missing_from_plex_jellyfin, orphaned_in_plex + + def generate_report(self): + """Generate and print a comprehensive comparison report.""" + print("\n" + "="*80) + print("LIBRARY COMPARISON REPORT") + print("="*80) + + missing_from_plex, missing_from_plex_jellyfin, orphaned_in_plex = self.compare_libraries() + + print(f"\nšŸ“Š SUMMARY:") + print(f" Movies missing from Plex (found in filesystem): {len(missing_from_plex)}") + print(f" Movies missing from Plex (found in Jellyfin): {len(missing_from_plex_jellyfin)}") + print(f" Movies in Plex but not in filesystem: {len(orphaned_in_plex)}") + + if missing_from_plex: + print(f"\nāŒ MOVIES IN FILESYSTEM BUT MISSING FROM PLEX ({len(missing_from_plex)}):") + print("-" * 80) + for i, (title, info) in enumerate(sorted(missing_from_plex.items()), 1): + print(f"\n{i}. {title}") + print(f" Path: {info['path']}") + print(f" In Jellyfin: {'āœ“ Yes' if info['in_jellyfin'] else 'āœ— No'}") + if info['jellyfin_title']: + print(f" Jellyfin Title: {info['jellyfin_title']}") + + if missing_from_plex_jellyfin and len(missing_from_plex_jellyfin) != len(missing_from_plex): + print(f"\nāŒ MOVIES IN JELLYFIN BUT MISSING FROM PLEX ({len(missing_from_plex_jellyfin)}):") + print("-" * 80) + for i, (key, info) in enumerate(sorted(missing_from_plex_jellyfin.items()), 1): + print(f"\n{i}. {info['title']}") + print(f" Path: {info['path']}") + print(f" In Filesystem: {'āœ“ Yes' if info['in_filesystem'] else 'āœ— No'}") + if info['filesystem_path']: + print(f" Filesystem Path: {info['filesystem_path']}") + + if orphaned_in_plex: + print(f"\nāš ļø MOVIES IN PLEX BUT NOT IN FILESYSTEM ({len(orphaned_in_plex)}):") + print("-" * 80) + for i, (title, info) in enumerate(sorted(orphaned_in_plex.items()), 1): + print(f"\n{i}. {title}") + if info.get('file'): + print(f" File: {info['file']}") + + # Save detailed report to JSON + report_data = { + 'missing_from_plex': missing_from_plex, + 'missing_from_plex_jellyfin': missing_from_plex_jellyfin, + 'orphaned_in_plex': orphaned_in_plex + } + + report_file = Path('movies_comparison_report.json') + with open(report_file, 'w') as f: + json.dump(report_data, f, indent=2, default=str) + + print(f"\nšŸ’¾ Detailed report saved to: {report_file}") + print("="*80) + + +def main(): + load_dotenv() + + # Get configuration from environment variables + plex_url = os.getenv('PLEX_URL', 'http://localhost:32400') + plex_token = os.getenv('PLEX_TOKEN') + jellyfin_url = os.getenv('JELLYFIN_URL', 'http://localhost:8096') + jellyfin_api_key = os.getenv('JELLYFIN_API_KEY') + jellyfin_user_id = os.getenv('JELLYFIN_USER_ID') + movies_path = os.getenv('MOVIES_PATH') + + # Validate required configuration + if not plex_token: + print("Error: PLEX_TOKEN not set in environment or .env file") + sys.exit(1) + + if not jellyfin_api_key: + print("Error: JELLYFIN_API_KEY not set in environment or .env file") + sys.exit(1) + + if not jellyfin_user_id: + print("Error: JELLYFIN_USER_ID not set in environment or .env file") + sys.exit(1) + + if not movies_path: + print("Error: MOVIES_PATH not set in environment or .env file") + sys.exit(1) + + comparator = MovieLibraryComparator( + plex_url=plex_url, + plex_token=plex_token, + jellyfin_url=jellyfin_url, + jellyfin_api_key=jellyfin_api_key, + jellyfin_user_id=jellyfin_user_id, + movies_path=movies_path + ) + + comparator.generate_report() + + +if __name__ == '__main__': + main() + diff --git a/compare_series.py b/compare_series.py new file mode 100755 index 0000000..338adf3 --- /dev/null +++ b/compare_series.py @@ -0,0 +1,539 @@ +#!/usr/bin/env python3 +""" +Compare Plex and Jellyfin TV series libraries to find discrepancies. +Identifies series that exist in filesystem/Jellyfin but are missing from Plex. +""" + +import os +import sys +from pathlib import Path +from typing import Set, Dict, List, Tuple +from collections import defaultdict +import json + +try: + from plexapi.server import PlexServer + from plexapi.exceptions import NotFound, Unauthorized +except ImportError: + print("Error: plexapi not installed. Run: pip install -r requirements.txt") + sys.exit(1) + +import requests +from requests.exceptions import RequestException +from dotenv import load_dotenv + + +class SeriesLibraryComparator: + def __init__(self, plex_url: str, plex_token: str, + jellyfin_url: str, jellyfin_api_key: str, jellyfin_user_id: str, + series_paths: List[str]): + self.plex_url = plex_url + self.plex_token = plex_token + self.jellyfin_url = jellyfin_url.rstrip('/') + self.jellyfin_api_key = jellyfin_api_key + self.jellyfin_user_id = jellyfin_user_id + # Support multiple series paths + if isinstance(series_paths, str): + series_paths = [series_paths] + self.series_paths = [Path(p) for p in series_paths] + self.series_paths_normalized = [self.normalize_path(str(p)) for p in self.series_paths] + + def normalize_path(self, path: str) -> str: + """Normalize path for comparison.""" + if not path: + return "" + return str(Path(path)).lower().replace('\\', '/').strip() + + def normalize_title(self, title: str) -> str: + """Normalize title for comparison (lowercase, normalize punctuation).""" + import re + normalized = title.lower().strip() + + # Remove year in parentheses + normalized = re.sub(r'\s*\(\d{4}\)\s*', ' ', normalized) + + # Remove all punctuation and special characters (keep alphanumeric and spaces only) + normalized = re.sub(r'[^a-z0-9\s]', ' ', normalized) + + # Normalize multiple spaces + normalized = re.sub(r'\s+', ' ', normalized) + + # Remove common articles + normalized = re.sub(r'\b(the|a|an)\b', ' ', normalized) + + # Normalize multiple spaces again after removals + normalized = re.sub(r'\s+', ' ', normalized) + + return normalized.strip() + + def extract_base_title(self, dirname: str) -> str: + """Extract base title from directory name, removing year, imdb tags, etc.""" + import re + # Remove patterns like (2021), [imdbid-tt123456], [tvdbid-123456], {imdb-tt123456} + # Note: Use * instead of + to match zero or more characters (handles empty IDs like [imdbid-]) + cleaned = re.sub(r'\s*\(\d{4}\)\s*', ' ', dirname) # Remove (year) + cleaned = re.sub(r'\s*\[imdbid-[^\]]*\]\s*', ' ', cleaned) # Remove [imdbid-...] or [imdbid-] + cleaned = re.sub(r'\s*\[tvdbid-[^\]]*\]\s*', ' ', cleaned) # Remove [tvdbid-...] or [tvdbid-] + cleaned = re.sub(r'\s*\{imdb-[^\}]*\}\s*', ' ', cleaned) # Remove {imdb-...} or {imdb-} + cleaned = re.sub(r'\s*\{tvdb-[^\}]*\}\s*', ' ', cleaned) # Remove {tvdb-...} or {tvdb-} + # Remove trailing dash/hyphen with spaces + cleaned = re.sub(r'\s*-\s*$', '', cleaned) + cleaned = re.sub(r'\s+', ' ', cleaned) # Normalize whitespace + return cleaned.strip() + + def get_filesystem_series(self) -> Dict[str, Dict]: + """Scan filesystem for TV series directories.""" + series = {} + + for series_path in self.series_paths: + print(f"Scanning filesystem at: {series_path}") + + if not series_path.exists(): + print(f"Warning: Series path does not exist: {series_path}") + continue + + # Each subdirectory of the series path is assumed to be a TV show + for series_dir in series_path.iterdir(): + if series_dir.is_dir() and not series_dir.name.startswith('.'): + # Count episodes + episode_count = 0 + video_extensions = {'.mkv', '.avi', '.mp4', '.m4v', '.mov', '.wmv', '.flv', '.webm'} + + for video_file in series_dir.rglob('*'): + if video_file.is_file() and video_file.suffix.lower() in video_extensions: + episode_count += 1 + + # Use full path as key to handle duplicates across paths + key = f"{series_dir.name}||{series_path.name}" + + series[key] = { + 'path': series_dir, + 'full_path': str(series_dir), + 'name': series_dir.name, + 'episode_count': episode_count, + 'base_path': str(series_path) + } + + print(f"Found {len(series)} TV series directories in filesystem") + return series + + def get_plex_series(self) -> Dict[str, Dict]: + """Query Plex API to get all TV series.""" + print("\nConnecting to Plex...") + try: + plex = PlexServer(self.plex_url, self.plex_token) + series_section = None + + # Find the TV Shows library section + for section in plex.library.sections(): + if section.type == 'show': + series_section = section + break + + if not series_section: + print("Error: No TV Shows library found in Plex") + return {} + + print(f"Found Plex TV Shows library: {series_section.title}") + + plex_series = {} + for show in series_section.all(): + episode_count = 0 + try: + episode_count = show.leafCount # Total episodes + except: + pass + + # Try to get the directory path + show_path = '' + try: + if hasattr(show, 'locations') and show.locations: + show_path = show.locations[0] + except: + pass + + plex_series[show.title] = { + 'title': show.title, + 'year': getattr(show, 'year', None), + 'episode_count': episode_count, + 'path': show_path, + 'added_at': getattr(show, 'addedAt', None), + } + + print(f"Found {len(plex_series)} TV series in Plex") + return plex_series + + except Unauthorized: + print("Error: Plex authentication failed. Check your PLEX_TOKEN") + return {} + except Exception as e: + print(f"Error connecting to Plex: {e}") + return {} + + def get_jellyfin_series(self) -> Dict[str, Dict]: + """Query Jellyfin API to get all TV series.""" + print("\nConnecting to Jellyfin...") + + headers = { + 'X-Emby-Token': self.jellyfin_api_key, + 'Content-Type': 'application/json' + } + + # Get all TV series from the library + url = f"{self.jellyfin_url}/Users/{self.jellyfin_user_id}/Items" + params = { + 'Recursive': 'false', + 'IncludeItemTypes': 'Series', + 'Fields': 'Path,DateCreated', + 'Limit': 1000 + } + + try: + all_series = {} + start_index = 0 + + while True: + params['StartIndex'] = start_index + response = requests.get(url, headers=headers, params=params, timeout=30) + response.raise_for_status() + + data = response.json() + items = data.get('Items', []) + + if not items: + break + + for item in items: + series_id = item.get('Id', '') + title = item.get('Name', '') + path = item.get('Path', '') + + # Filter: Only include series from the configured Series directories + if path: + path_normalized = self.normalize_path(path) + # Check if path starts with any of the configured series paths + if not any(path_normalized.startswith(sp) for sp in self.series_paths_normalized): + continue + else: + continue + + # Get episode count + episode_count = 0 + try: + # Query for episode count + episodes_url = f"{self.jellyfin_url}/Users/{self.jellyfin_user_id}/Items" + episodes_params = { + 'ParentId': series_id, + 'Recursive': 'true', + 'IncludeItemTypes': 'Episode', + 'Limit': 1 + } + ep_response = requests.get(episodes_url, headers=headers, params=episodes_params, timeout=10) + if ep_response.status_code == 200: + ep_data = ep_response.json() + episode_count = ep_data.get('TotalRecordCount', 0) + except: + pass + + all_series[title] = { + 'title': title, + 'year': item.get('ProductionYear'), + 'path': path, + 'id': series_id, + 'episode_count': episode_count, + 'date_created': item.get('DateCreated'), + } + + # Check if there are more items + total_records = data.get('TotalRecordCount', 0) + if start_index + len(items) >= total_records: + break + + start_index += len(items) + + print(f"Found {len(all_series)} TV series in Jellyfin") + return all_series + + except RequestException as e: + print(f"Error connecting to Jellyfin: {e}") + if hasattr(e, 'response') and e.response is not None: + print(f"Response: {e.response.text}") + return {} + + def build_jellyfin_lookup(self, jellyfin_series: Dict) -> Dict[str, Dict]: + """Build a lookup index for Jellyfin series by normalized names and paths.""" + lookup = {} + + for jf_key, jf_data in jellyfin_series.items(): + # Index by normalized title + title_norm = self.normalize_title(jf_data['title']) + lookup[title_norm] = jf_data + + # Index by normalized path directory name (with and without year/tags) + jf_path = jf_data.get('path', '') + if jf_path: + path_obj = Path(jf_path) + dirname = path_obj.name + dirname_norm = self.normalize_title(dirname) + lookup[dirname_norm] = jf_data + + # Also index by base title (without year/tags) + base_title = self.extract_base_title(dirname) + base_title_norm = self.normalize_title(base_title) + lookup[base_title_norm] = jf_data + + return lookup + + def find_jellyfin_match(self, fs_name: str, fs_path: str, jellyfin_lookup: Dict) -> Tuple[bool, str, int]: + """Find if a filesystem series exists in Jellyfin.""" + fs_name_norm = self.normalize_title(fs_name) + fs_path_norm = self.normalize_path(fs_path) + + # Extract base title from filesystem name + fs_base_title = self.extract_base_title(fs_name) + fs_base_norm = self.normalize_title(fs_base_title) + + # Try matching by normalized name (full and base) + for norm_key in [fs_name_norm, fs_base_norm]: + if norm_key in jellyfin_lookup: + jf_data = jellyfin_lookup[norm_key] + return True, jf_data.get('title', ''), jf_data.get('episode_count', 0) + + # Try matching by path + for jf_norm_key, jf_data in jellyfin_lookup.items(): + jf_path = jf_data.get('path', '') + if jf_path: + jf_path_norm = self.normalize_path(jf_path) + if fs_path_norm == jf_path_norm or fs_path_norm in jf_path_norm: + return True, jf_data.get('title', ''), jf_data.get('episode_count', 0) + + return False, None, 0 + + def compare_libraries(self) -> Tuple[Dict, Dict, Dict]: + """Compare all three sources and return discrepancies.""" + fs_series = self.get_filesystem_series() + plex_series = self.get_plex_series() + jellyfin_series = self.get_jellyfin_series() + + # Build Jellyfin lookup index + jellyfin_lookup = self.build_jellyfin_lookup(jellyfin_series) + + # Normalize keys for comparison - extract base title from filesystem + fs_normalized = {} + for k, v in fs_series.items(): + # Extract base title from directory name (remove year, imdb tags) + base_title = self.extract_base_title(v['name']) + normalized = self.normalize_title(base_title) + fs_normalized[normalized] = (k, v) + + plex_normalized = {self.normalize_title(k): (k, v) for k, v in plex_series.items()} + + # Find series in filesystem but not in Plex + missing_from_plex = {} + for norm_key, (orig_key, fs_data) in fs_normalized.items(): + if norm_key not in plex_normalized: + # Double-check by path - maybe the title doesn't match but path does + fs_path = fs_data['full_path'] + fs_path_norm = self.normalize_path(fs_path) + found_in_plex = False + + # Check if any Plex series has a matching path + for plex_title, plex_data in plex_series.items(): + plex_path = plex_data.get('path', '') + if plex_path: + plex_path_norm = self.normalize_path(plex_path) + if fs_path_norm == plex_path_norm or fs_path_norm in plex_path_norm or plex_path_norm in fs_path_norm: + found_in_plex = True + break + + if not found_in_plex: + fs_name = fs_data['name'] + in_jellyfin, jf_title, jf_ep_count = self.find_jellyfin_match(fs_name, fs_path, jellyfin_lookup) + + missing_from_plex[orig_key] = { + 'path': fs_path, + 'episode_count': fs_data['episode_count'], + 'in_jellyfin': in_jellyfin, + 'jellyfin_title': jf_title, + 'jellyfin_episode_count': jf_ep_count + } + + # Find series in Jellyfin but not in Plex + missing_from_plex_jellyfin = {} + for orig_key, data in jellyfin_series.items(): + jf_title_norm = self.normalize_title(data['title']) + + if jf_title_norm not in plex_normalized: + jf_path = data.get('path', '') + in_filesystem = False + fs_path = None + fs_ep_count = 0 + + if jf_path: + jf_path_norm = self.normalize_path(jf_path) + dirname = Path(jf_path).name + dirname_norm = self.normalize_title(dirname) + jf_title_norm_alt = self.normalize_title(data['title']) + + for fs_orig_key, fs_data in fs_series.items(): + fs_path_str = fs_data['full_path'] + fs_path_norm = self.normalize_path(fs_path_str) + fs_name_norm = self.normalize_title(fs_data['name']) + + # Match by path, directory name, or title + if (jf_path_norm == fs_path_norm or + dirname_norm == fs_name_norm or + jf_title_norm_alt == fs_name_norm): + in_filesystem = True + fs_path = fs_path_str + fs_ep_count = fs_data['episode_count'] + break + + missing_from_plex_jellyfin[orig_key] = { + 'title': data['title'], + 'path': jf_path, + 'episode_count': data['episode_count'], + 'in_filesystem': in_filesystem, + 'filesystem_path': fs_path, + 'filesystem_episode_count': fs_ep_count + } + + # Find series in Plex but not in filesystem (orphaned) + orphaned_in_plex = {} + for norm_key, (orig_key, data) in plex_normalized.items(): + if norm_key not in fs_normalized: + # Double-check by looking at the actual path if available + plex_path = data.get('path', '') + found_in_fs = False + + if plex_path: + plex_path_norm = self.normalize_path(plex_path) + # Check if this path matches any filesystem series + for fs_key, fs_data in fs_series.items(): + fs_path_norm = self.normalize_path(fs_data['full_path']) + if plex_path_norm == fs_path_norm or plex_path_norm in fs_path_norm or fs_path_norm in plex_path_norm: + found_in_fs = True + break + + if not found_in_fs: + orphaned_in_plex[orig_key] = data + + return missing_from_plex, missing_from_plex_jellyfin, orphaned_in_plex + + def generate_report(self): + """Generate and print a comprehensive comparison report.""" + print("\n" + "="*80) + print("TV SERIES COMPARISON REPORT") + print("="*80) + + missing_from_plex, missing_from_plex_jellyfin, orphaned_in_plex = self.compare_libraries() + + print(f"\nšŸ“Š SUMMARY:") + print(f" Series missing from Plex (found in filesystem): {len(missing_from_plex)}") + print(f" Series missing from Plex (found in Jellyfin): {len(missing_from_plex_jellyfin)}") + print(f" Series in Plex but not in filesystem: {len(orphaned_in_plex)}") + + if missing_from_plex: + print(f"\nāŒ TV SERIES IN FILESYSTEM BUT MISSING FROM PLEX ({len(missing_from_plex)}):") + print("-" * 80) + for i, (title, info) in enumerate(sorted(missing_from_plex.items()), 1): + print(f"\n{i}. {title}") + print(f" Path: {info['path']}") + print(f" Episodes: {info['episode_count']}") + print(f" In Jellyfin: {'āœ“ Yes' if info['in_jellyfin'] else 'āœ— No'}") + if info['jellyfin_title']: + print(f" Jellyfin Title: {info['jellyfin_title']}") + print(f" Jellyfin Episodes: {info['jellyfin_episode_count']}") + + if missing_from_plex_jellyfin and len(missing_from_plex_jellyfin) != len(missing_from_plex): + print(f"\nāŒ TV SERIES IN JELLYFIN BUT MISSING FROM PLEX ({len(missing_from_plex_jellyfin)}):") + print("-" * 80) + for i, (key, info) in enumerate(sorted(missing_from_plex_jellyfin.items()), 1): + print(f"\n{i}. {info['title']}") + print(f" Path: {info['path']}") + print(f" Episodes: {info['episode_count']}") + print(f" In Filesystem: {'āœ“ Yes' if info['in_filesystem'] else 'āœ— No'}") + if info['filesystem_path']: + print(f" Filesystem Path: {info['filesystem_path']}") + print(f" Filesystem Episodes: {info['filesystem_episode_count']}") + + if orphaned_in_plex: + print(f"\nāš ļø TV SERIES IN PLEX BUT NOT IN FILESYSTEM ({len(orphaned_in_plex)}):") + print("-" * 80) + for i, (title, info) in enumerate(sorted(orphaned_in_plex.items()), 1): + print(f"\n{i}. {title}") + if info.get('path'): + print(f" Path: {info['path']}") + print(f" Episodes: {info.get('episode_count', 0)}") + + # Save detailed report to JSON + report_data = { + 'missing_from_plex': missing_from_plex, + 'missing_from_plex_jellyfin': missing_from_plex_jellyfin, + 'orphaned_in_plex': orphaned_in_plex + } + + report_file = Path('series_comparison_report.json') + with open(report_file, 'w') as f: + json.dump(report_data, f, indent=2, default=str) + + print(f"\nšŸ’¾ Detailed report saved to: {report_file}") + print("="*80) + + +def main(): + load_dotenv() + + # Get configuration from environment variables + plex_url = os.getenv('PLEX_URL', 'http://localhost:32400') + plex_token = os.getenv('PLEX_TOKEN') + jellyfin_url = os.getenv('JELLYFIN_URL', 'http://localhost:8096') + jellyfin_api_key = os.getenv('JELLYFIN_API_KEY') + jellyfin_user_id = os.getenv('JELLYFIN_USER_ID') + + # Support multiple series paths + series_paths = [] + series_path_primary = os.getenv('SERIES_PATH', os.getenv('TV_PATH')) + if series_path_primary: + series_paths.append(series_path_primary) + + # Check for additional paths (SERIES_PATH_2, SERIES_PATH_3, etc.) + for i in range(2, 10): + extra_path = os.getenv(f'SERIES_PATH_{i}') + if extra_path: + series_paths.append(extra_path) + + if not series_paths: + print("Error: No SERIES_PATH or TV_PATH set in environment or .env file") + sys.exit(1) + + # Validate required configuration + if not plex_token: + print("Error: PLEX_TOKEN not set in environment or .env file") + sys.exit(1) + + if not jellyfin_api_key: + print("Error: JELLYFIN_API_KEY not set in environment or .env file") + sys.exit(1) + + if not jellyfin_user_id: + print("Error: JELLYFIN_USER_ID not set in environment or .env file") + sys.exit(1) + + print(f"Configured series paths: {series_paths}") + + comparator = SeriesLibraryComparator( + plex_url=plex_url, + plex_token=plex_token, + jellyfin_url=jellyfin_url, + jellyfin_api_key=jellyfin_api_key, + jellyfin_user_id=jellyfin_user_id, + series_paths=series_paths + ) + + comparator.generate_report() + + +if __name__ == '__main__': + main() + diff --git a/config.example.txt b/config.example.txt new file mode 100644 index 0000000..6b5229f --- /dev/null +++ b/config.example.txt @@ -0,0 +1,22 @@ +# Jellyfin-Plex Library Checker Configuration +# Copy this to .env and fill in your actual values + +# Plex Configuration +PLEX_URL=http://localhost:32400 +PLEX_TOKEN=your_plex_token_here + +# Jellyfin Configuration +JELLYFIN_URL=http://localhost:8096 +JELLYFIN_API_KEY=your_jellyfin_api_key_here +# NOTE: JELLYFIN_USER_ID must be a UUID, not a username! +# Get it from: Dashboard → Users → Click your user → Copy ID from URL +JELLYFIN_USER_ID=a1b2c3d4-5678-90ab-cdef-1234567890ab + +# Filesystem Configuration +MOVIES_PATH=/srv/pool/multimedia/media/Movies +SERIES_PATH=/srv/pool/multimedia/media/Series + +# Optional: Additional series paths (supports up to SERIES_PATH_9) +# SERIES_PATH_2=/srv/backups/Series +# SERIES_PATH_3=/mnt/external/TV + diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..ee46b40 --- /dev/null +++ b/flake.lock @@ -0,0 +1,61 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1731533236, + "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1764667669, + "narHash": "sha256-7WUCZfmqLAssbDqwg9cUDAXrSoXN79eEEq17qhTNM/Y=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "418468ac9527e799809c900eda37cbff999199b6", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..3b44850 --- /dev/null +++ b/flake.nix @@ -0,0 +1,97 @@ +{ + description = "Jellyfin-Plex Library Comparator"; + + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = import nixpkgs { + inherit system; + }; + + # Python environment with pip and build tools + pythonEnv = pkgs.python3.withPackages (ps: with ps; [ + pip + setuptools + wheel + virtualenv + ]); + + in + { + apps = { + compare-movies = { + type = "app"; + program = "${pkgs.writeShellScript "compare-movies" '' + cd ${./.} + ${pythonEnv}/bin/python compare_movies.py + ''}"; + }; + + analyze-movies = { + type = "app"; + program = "${pkgs.writeShellScript "analyze-movies" '' + cd ${./.} + ${pythonEnv}/bin/python analyze_movies.py + ''}"; + }; + + compare-series = { + type = "app"; + program = "${pkgs.writeShellScript "compare-series" '' + cd ${./.} + ${pythonEnv}/bin/python compare_series.py + ''}"; + }; + + analyze-series = { + type = "app"; + program = "${pkgs.writeShellScript "analyze-series" '' + cd ${./.} + ${pythonEnv}/bin/python analyze_series.py + ''}"; + }; + }; + + devShells.default = pkgs.mkShell { + buildInputs = [ + pythonEnv + ]; + + shellHook = '' + echo "šŸ Setting up Python environment..." + + # Create virtual environment if it doesn't exist + if [ ! -d "venv" ]; then + echo "Creating virtual environment..." + ${pythonEnv}/bin/python -m venv venv + fi + + # Activate virtual environment + source venv/bin/activate + + # Install/upgrade dependencies + echo "Installing Python dependencies from requirements.txt..." + pip install --upgrade pip setuptools wheel --quiet + pip install -r requirements.txt + + echo "" + echo "āœ… Python environment ready!" + echo "" + echo "šŸ“‹ Available commands:" + echo " Movies: python compare_movies.py | nix run .#compare-movies" + echo " python analyze_movies.py | nix run .#analyze-movies" + echo "" + echo " Series: python compare_series.py | nix run .#compare-series" + echo " python analyze_series.py | nix run .#analyze-series" + echo "" + ''; + }; + } + ); +} +