Files
plexfin-compare/compare_series.py
Danilo Reyes e772af13a7 Add initial project files for Jellyfin-Plex Library Checker
- Create .editorconfig for consistent coding styles.
- Add .envrc for direnv integration.
- Include .gitignore to exclude environment and build files.
- Implement compare_movies.py and analyze_movies.py for movie library comparison and analysis.
- Implement compare_series.py and analyze_series.py for TV series library comparison and analysis.
- Add configuration example in config.example.txt.
- Create README.md with project overview, setup instructions, and usage examples.
- Add LICENSE file for MIT License.
- Include flake.nix and flake.lock for Nix-based development environment.
- Add USAGE.md for quick start guide and common commands.
2025-12-05 01:57:15 -06:00

540 lines
23 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Compare Plex and Jellyfin TV series libraries to find discrepancies.
Identifies series that exist in filesystem/Jellyfin but are missing from Plex.
"""
import os
import sys
from pathlib import Path
from typing import Set, Dict, List, Tuple
from collections import defaultdict
import json
try:
from plexapi.server import PlexServer
from plexapi.exceptions import NotFound, Unauthorized
except ImportError:
print("Error: plexapi not installed. Run: pip install -r requirements.txt")
sys.exit(1)
import requests
from requests.exceptions import RequestException
from dotenv import load_dotenv
class SeriesLibraryComparator:
def __init__(self, plex_url: str, plex_token: str,
jellyfin_url: str, jellyfin_api_key: str, jellyfin_user_id: str,
series_paths: List[str]):
self.plex_url = plex_url
self.plex_token = plex_token
self.jellyfin_url = jellyfin_url.rstrip('/')
self.jellyfin_api_key = jellyfin_api_key
self.jellyfin_user_id = jellyfin_user_id
# Support multiple series paths
if isinstance(series_paths, str):
series_paths = [series_paths]
self.series_paths = [Path(p) for p in series_paths]
self.series_paths_normalized = [self.normalize_path(str(p)) for p in self.series_paths]
def normalize_path(self, path: str) -> str:
"""Normalize path for comparison."""
if not path:
return ""
return str(Path(path)).lower().replace('\\', '/').strip()
def normalize_title(self, title: str) -> str:
"""Normalize title for comparison (lowercase, normalize punctuation)."""
import re
normalized = title.lower().strip()
# Remove year in parentheses
normalized = re.sub(r'\s*\(\d{4}\)\s*', ' ', normalized)
# Remove all punctuation and special characters (keep alphanumeric and spaces only)
normalized = re.sub(r'[^a-z0-9\s]', ' ', normalized)
# Normalize multiple spaces
normalized = re.sub(r'\s+', ' ', normalized)
# Remove common articles
normalized = re.sub(r'\b(the|a|an)\b', ' ', normalized)
# Normalize multiple spaces again after removals
normalized = re.sub(r'\s+', ' ', normalized)
return normalized.strip()
def extract_base_title(self, dirname: str) -> str:
"""Extract base title from directory name, removing year, imdb tags, etc."""
import re
# Remove patterns like (2021), [imdbid-tt123456], [tvdbid-123456], {imdb-tt123456}
# Note: Use * instead of + to match zero or more characters (handles empty IDs like [imdbid-])
cleaned = re.sub(r'\s*\(\d{4}\)\s*', ' ', dirname) # Remove (year)
cleaned = re.sub(r'\s*\[imdbid-[^\]]*\]\s*', ' ', cleaned) # Remove [imdbid-...] or [imdbid-]
cleaned = re.sub(r'\s*\[tvdbid-[^\]]*\]\s*', ' ', cleaned) # Remove [tvdbid-...] or [tvdbid-]
cleaned = re.sub(r'\s*\{imdb-[^\}]*\}\s*', ' ', cleaned) # Remove {imdb-...} or {imdb-}
cleaned = re.sub(r'\s*\{tvdb-[^\}]*\}\s*', ' ', cleaned) # Remove {tvdb-...} or {tvdb-}
# Remove trailing dash/hyphen with spaces
cleaned = re.sub(r'\s*-\s*$', '', cleaned)
cleaned = re.sub(r'\s+', ' ', cleaned) # Normalize whitespace
return cleaned.strip()
def get_filesystem_series(self) -> Dict[str, Dict]:
"""Scan filesystem for TV series directories."""
series = {}
for series_path in self.series_paths:
print(f"Scanning filesystem at: {series_path}")
if not series_path.exists():
print(f"Warning: Series path does not exist: {series_path}")
continue
# Each subdirectory of the series path is assumed to be a TV show
for series_dir in series_path.iterdir():
if series_dir.is_dir() and not series_dir.name.startswith('.'):
# Count episodes
episode_count = 0
video_extensions = {'.mkv', '.avi', '.mp4', '.m4v', '.mov', '.wmv', '.flv', '.webm'}
for video_file in series_dir.rglob('*'):
if video_file.is_file() and video_file.suffix.lower() in video_extensions:
episode_count += 1
# Use full path as key to handle duplicates across paths
key = f"{series_dir.name}||{series_path.name}"
series[key] = {
'path': series_dir,
'full_path': str(series_dir),
'name': series_dir.name,
'episode_count': episode_count,
'base_path': str(series_path)
}
print(f"Found {len(series)} TV series directories in filesystem")
return series
def get_plex_series(self) -> Dict[str, Dict]:
"""Query Plex API to get all TV series."""
print("\nConnecting to Plex...")
try:
plex = PlexServer(self.plex_url, self.plex_token)
series_section = None
# Find the TV Shows library section
for section in plex.library.sections():
if section.type == 'show':
series_section = section
break
if not series_section:
print("Error: No TV Shows library found in Plex")
return {}
print(f"Found Plex TV Shows library: {series_section.title}")
plex_series = {}
for show in series_section.all():
episode_count = 0
try:
episode_count = show.leafCount # Total episodes
except:
pass
# Try to get the directory path
show_path = ''
try:
if hasattr(show, 'locations') and show.locations:
show_path = show.locations[0]
except:
pass
plex_series[show.title] = {
'title': show.title,
'year': getattr(show, 'year', None),
'episode_count': episode_count,
'path': show_path,
'added_at': getattr(show, 'addedAt', None),
}
print(f"Found {len(plex_series)} TV series in Plex")
return plex_series
except Unauthorized:
print("Error: Plex authentication failed. Check your PLEX_TOKEN")
return {}
except Exception as e:
print(f"Error connecting to Plex: {e}")
return {}
def get_jellyfin_series(self) -> Dict[str, Dict]:
"""Query Jellyfin API to get all TV series."""
print("\nConnecting to Jellyfin...")
headers = {
'X-Emby-Token': self.jellyfin_api_key,
'Content-Type': 'application/json'
}
# Get all TV series from the library
url = f"{self.jellyfin_url}/Users/{self.jellyfin_user_id}/Items"
params = {
'Recursive': 'false',
'IncludeItemTypes': 'Series',
'Fields': 'Path,DateCreated',
'Limit': 1000
}
try:
all_series = {}
start_index = 0
while True:
params['StartIndex'] = start_index
response = requests.get(url, headers=headers, params=params, timeout=30)
response.raise_for_status()
data = response.json()
items = data.get('Items', [])
if not items:
break
for item in items:
series_id = item.get('Id', '')
title = item.get('Name', '')
path = item.get('Path', '')
# Filter: Only include series from the configured Series directories
if path:
path_normalized = self.normalize_path(path)
# Check if path starts with any of the configured series paths
if not any(path_normalized.startswith(sp) for sp in self.series_paths_normalized):
continue
else:
continue
# Get episode count
episode_count = 0
try:
# Query for episode count
episodes_url = f"{self.jellyfin_url}/Users/{self.jellyfin_user_id}/Items"
episodes_params = {
'ParentId': series_id,
'Recursive': 'true',
'IncludeItemTypes': 'Episode',
'Limit': 1
}
ep_response = requests.get(episodes_url, headers=headers, params=episodes_params, timeout=10)
if ep_response.status_code == 200:
ep_data = ep_response.json()
episode_count = ep_data.get('TotalRecordCount', 0)
except:
pass
all_series[title] = {
'title': title,
'year': item.get('ProductionYear'),
'path': path,
'id': series_id,
'episode_count': episode_count,
'date_created': item.get('DateCreated'),
}
# Check if there are more items
total_records = data.get('TotalRecordCount', 0)
if start_index + len(items) >= total_records:
break
start_index += len(items)
print(f"Found {len(all_series)} TV series in Jellyfin")
return all_series
except RequestException as e:
print(f"Error connecting to Jellyfin: {e}")
if hasattr(e, 'response') and e.response is not None:
print(f"Response: {e.response.text}")
return {}
def build_jellyfin_lookup(self, jellyfin_series: Dict) -> Dict[str, Dict]:
"""Build a lookup index for Jellyfin series by normalized names and paths."""
lookup = {}
for jf_key, jf_data in jellyfin_series.items():
# Index by normalized title
title_norm = self.normalize_title(jf_data['title'])
lookup[title_norm] = jf_data
# Index by normalized path directory name (with and without year/tags)
jf_path = jf_data.get('path', '')
if jf_path:
path_obj = Path(jf_path)
dirname = path_obj.name
dirname_norm = self.normalize_title(dirname)
lookup[dirname_norm] = jf_data
# Also index by base title (without year/tags)
base_title = self.extract_base_title(dirname)
base_title_norm = self.normalize_title(base_title)
lookup[base_title_norm] = jf_data
return lookup
def find_jellyfin_match(self, fs_name: str, fs_path: str, jellyfin_lookup: Dict) -> Tuple[bool, str, int]:
"""Find if a filesystem series exists in Jellyfin."""
fs_name_norm = self.normalize_title(fs_name)
fs_path_norm = self.normalize_path(fs_path)
# Extract base title from filesystem name
fs_base_title = self.extract_base_title(fs_name)
fs_base_norm = self.normalize_title(fs_base_title)
# Try matching by normalized name (full and base)
for norm_key in [fs_name_norm, fs_base_norm]:
if norm_key in jellyfin_lookup:
jf_data = jellyfin_lookup[norm_key]
return True, jf_data.get('title', ''), jf_data.get('episode_count', 0)
# Try matching by path
for jf_norm_key, jf_data in jellyfin_lookup.items():
jf_path = jf_data.get('path', '')
if jf_path:
jf_path_norm = self.normalize_path(jf_path)
if fs_path_norm == jf_path_norm or fs_path_norm in jf_path_norm:
return True, jf_data.get('title', ''), jf_data.get('episode_count', 0)
return False, None, 0
def compare_libraries(self) -> Tuple[Dict, Dict, Dict]:
"""Compare all three sources and return discrepancies."""
fs_series = self.get_filesystem_series()
plex_series = self.get_plex_series()
jellyfin_series = self.get_jellyfin_series()
# Build Jellyfin lookup index
jellyfin_lookup = self.build_jellyfin_lookup(jellyfin_series)
# Normalize keys for comparison - extract base title from filesystem
fs_normalized = {}
for k, v in fs_series.items():
# Extract base title from directory name (remove year, imdb tags)
base_title = self.extract_base_title(v['name'])
normalized = self.normalize_title(base_title)
fs_normalized[normalized] = (k, v)
plex_normalized = {self.normalize_title(k): (k, v) for k, v in plex_series.items()}
# Find series in filesystem but not in Plex
missing_from_plex = {}
for norm_key, (orig_key, fs_data) in fs_normalized.items():
if norm_key not in plex_normalized:
# Double-check by path - maybe the title doesn't match but path does
fs_path = fs_data['full_path']
fs_path_norm = self.normalize_path(fs_path)
found_in_plex = False
# Check if any Plex series has a matching path
for plex_title, plex_data in plex_series.items():
plex_path = plex_data.get('path', '')
if plex_path:
plex_path_norm = self.normalize_path(plex_path)
if fs_path_norm == plex_path_norm or fs_path_norm in plex_path_norm or plex_path_norm in fs_path_norm:
found_in_plex = True
break
if not found_in_plex:
fs_name = fs_data['name']
in_jellyfin, jf_title, jf_ep_count = self.find_jellyfin_match(fs_name, fs_path, jellyfin_lookup)
missing_from_plex[orig_key] = {
'path': fs_path,
'episode_count': fs_data['episode_count'],
'in_jellyfin': in_jellyfin,
'jellyfin_title': jf_title,
'jellyfin_episode_count': jf_ep_count
}
# Find series in Jellyfin but not in Plex
missing_from_plex_jellyfin = {}
for orig_key, data in jellyfin_series.items():
jf_title_norm = self.normalize_title(data['title'])
if jf_title_norm not in plex_normalized:
jf_path = data.get('path', '')
in_filesystem = False
fs_path = None
fs_ep_count = 0
if jf_path:
jf_path_norm = self.normalize_path(jf_path)
dirname = Path(jf_path).name
dirname_norm = self.normalize_title(dirname)
jf_title_norm_alt = self.normalize_title(data['title'])
for fs_orig_key, fs_data in fs_series.items():
fs_path_str = fs_data['full_path']
fs_path_norm = self.normalize_path(fs_path_str)
fs_name_norm = self.normalize_title(fs_data['name'])
# Match by path, directory name, or title
if (jf_path_norm == fs_path_norm or
dirname_norm == fs_name_norm or
jf_title_norm_alt == fs_name_norm):
in_filesystem = True
fs_path = fs_path_str
fs_ep_count = fs_data['episode_count']
break
missing_from_plex_jellyfin[orig_key] = {
'title': data['title'],
'path': jf_path,
'episode_count': data['episode_count'],
'in_filesystem': in_filesystem,
'filesystem_path': fs_path,
'filesystem_episode_count': fs_ep_count
}
# Find series in Plex but not in filesystem (orphaned)
orphaned_in_plex = {}
for norm_key, (orig_key, data) in plex_normalized.items():
if norm_key not in fs_normalized:
# Double-check by looking at the actual path if available
plex_path = data.get('path', '')
found_in_fs = False
if plex_path:
plex_path_norm = self.normalize_path(plex_path)
# Check if this path matches any filesystem series
for fs_key, fs_data in fs_series.items():
fs_path_norm = self.normalize_path(fs_data['full_path'])
if plex_path_norm == fs_path_norm or plex_path_norm in fs_path_norm or fs_path_norm in plex_path_norm:
found_in_fs = True
break
if not found_in_fs:
orphaned_in_plex[orig_key] = data
return missing_from_plex, missing_from_plex_jellyfin, orphaned_in_plex
def generate_report(self):
"""Generate and print a comprehensive comparison report."""
print("\n" + "="*80)
print("TV SERIES COMPARISON REPORT")
print("="*80)
missing_from_plex, missing_from_plex_jellyfin, orphaned_in_plex = self.compare_libraries()
print(f"\n📊 SUMMARY:")
print(f" Series missing from Plex (found in filesystem): {len(missing_from_plex)}")
print(f" Series missing from Plex (found in Jellyfin): {len(missing_from_plex_jellyfin)}")
print(f" Series in Plex but not in filesystem: {len(orphaned_in_plex)}")
if missing_from_plex:
print(f"\n❌ TV SERIES IN FILESYSTEM BUT MISSING FROM PLEX ({len(missing_from_plex)}):")
print("-" * 80)
for i, (title, info) in enumerate(sorted(missing_from_plex.items()), 1):
print(f"\n{i}. {title}")
print(f" Path: {info['path']}")
print(f" Episodes: {info['episode_count']}")
print(f" In Jellyfin: {'✓ Yes' if info['in_jellyfin'] else '✗ No'}")
if info['jellyfin_title']:
print(f" Jellyfin Title: {info['jellyfin_title']}")
print(f" Jellyfin Episodes: {info['jellyfin_episode_count']}")
if missing_from_plex_jellyfin and len(missing_from_plex_jellyfin) != len(missing_from_plex):
print(f"\n❌ TV SERIES IN JELLYFIN BUT MISSING FROM PLEX ({len(missing_from_plex_jellyfin)}):")
print("-" * 80)
for i, (key, info) in enumerate(sorted(missing_from_plex_jellyfin.items()), 1):
print(f"\n{i}. {info['title']}")
print(f" Path: {info['path']}")
print(f" Episodes: {info['episode_count']}")
print(f" In Filesystem: {'✓ Yes' if info['in_filesystem'] else '✗ No'}")
if info['filesystem_path']:
print(f" Filesystem Path: {info['filesystem_path']}")
print(f" Filesystem Episodes: {info['filesystem_episode_count']}")
if orphaned_in_plex:
print(f"\n⚠️ TV SERIES IN PLEX BUT NOT IN FILESYSTEM ({len(orphaned_in_plex)}):")
print("-" * 80)
for i, (title, info) in enumerate(sorted(orphaned_in_plex.items()), 1):
print(f"\n{i}. {title}")
if info.get('path'):
print(f" Path: {info['path']}")
print(f" Episodes: {info.get('episode_count', 0)}")
# Save detailed report to JSON
report_data = {
'missing_from_plex': missing_from_plex,
'missing_from_plex_jellyfin': missing_from_plex_jellyfin,
'orphaned_in_plex': orphaned_in_plex
}
report_file = Path('series_comparison_report.json')
with open(report_file, 'w') as f:
json.dump(report_data, f, indent=2, default=str)
print(f"\n💾 Detailed report saved to: {report_file}")
print("="*80)
def main():
load_dotenv()
# Get configuration from environment variables
plex_url = os.getenv('PLEX_URL', 'http://localhost:32400')
plex_token = os.getenv('PLEX_TOKEN')
jellyfin_url = os.getenv('JELLYFIN_URL', 'http://localhost:8096')
jellyfin_api_key = os.getenv('JELLYFIN_API_KEY')
jellyfin_user_id = os.getenv('JELLYFIN_USER_ID')
# Support multiple series paths
series_paths = []
series_path_primary = os.getenv('SERIES_PATH', os.getenv('TV_PATH'))
if series_path_primary:
series_paths.append(series_path_primary)
# Check for additional paths (SERIES_PATH_2, SERIES_PATH_3, etc.)
for i in range(2, 10):
extra_path = os.getenv(f'SERIES_PATH_{i}')
if extra_path:
series_paths.append(extra_path)
if not series_paths:
print("Error: No SERIES_PATH or TV_PATH set in environment or .env file")
sys.exit(1)
# Validate required configuration
if not plex_token:
print("Error: PLEX_TOKEN not set in environment or .env file")
sys.exit(1)
if not jellyfin_api_key:
print("Error: JELLYFIN_API_KEY not set in environment or .env file")
sys.exit(1)
if not jellyfin_user_id:
print("Error: JELLYFIN_USER_ID not set in environment or .env file")
sys.exit(1)
print(f"Configured series paths: {series_paths}")
comparator = SeriesLibraryComparator(
plex_url=plex_url,
plex_token=plex_token,
jellyfin_url=jellyfin_url,
jellyfin_api_key=jellyfin_api_key,
jellyfin_user_id=jellyfin_user_id,
series_paths=series_paths
)
comparator.generate_report()
if __name__ == '__main__':
main()