Files
media-map/backend/app/services/sync.py
Danilo Reyes 335a53ee62 Add country metadata extraction and assignment features
- Updated `README.md` to include instructions for setting up the TMDB API key and new admin endpoints for managing country metadata.
- Implemented `/admin/missing-countries` endpoint to list media items without country metadata, with filtering options for source and media type.
- Added `/admin/assign-country` endpoint to manually assign a country code to a media item.
- Enhanced country extraction logic in `sync.py` to utilize TMDB and MusicBrainz APIs for automatic country retrieval based on available metadata.
- Updated configuration in `config.py` to include optional TMDB API key setting.
- Improved error handling and logging for country extraction failures.
- Ensured that country data is stored and utilized during media item synchronization across Radarr, Sonarr, and Lidarr.
2025-12-28 21:47:03 -06:00

416 lines
16 KiB
Python

"""Sync service for *arr instances"""
import httpx
import logging
from typing import Dict, List, Optional
from app.core.config import settings
import json
import os
logger = logging.getLogger(__name__)
# TMDB API configuration
TMDB_BASE_URL = "https://api.themoviedb.org/3"
logger = logging.getLogger(__name__)
async def fetch_radarr_movies() -> List[Dict]:
"""Fetch all movies from Radarr"""
if not settings.radarr_api_key:
logger.warning("Radarr API key not configured")
return []
async with httpx.AsyncClient() as client:
try:
response = await client.get(
f"{settings.radarr_url}/api/v3/movie",
headers={"X-Api-Key": settings.radarr_api_key},
timeout=30.0
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to fetch Radarr movies: {e}")
return []
async def fetch_sonarr_series() -> List[Dict]:
"""Fetch all series from Sonarr"""
if not settings.sonarr_api_key:
logger.warning("Sonarr API key not configured")
return []
async with httpx.AsyncClient() as client:
try:
response = await client.get(
f"{settings.sonarr_url}/api/v3/series",
headers={"X-Api-Key": settings.sonarr_api_key},
timeout=30.0
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to fetch Sonarr series: {e}")
return []
async def fetch_lidarr_artists() -> List[Dict]:
"""Fetch all artists from Lidarr"""
if not settings.lidarr_api_key:
logger.warning("Lidarr API key not configured")
return []
async with httpx.AsyncClient() as client:
try:
response = await client.get(
f"{settings.lidarr_url}/api/v1/artist",
headers={"X-Api-Key": settings.lidarr_api_key},
timeout=30.0
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to fetch Lidarr artists: {e}")
return []
async def get_tmdb_movie_country(tmdb_id: int) -> Optional[str]:
"""Get country code from TMDB API for a movie"""
if not settings.tmdb_api_key:
return None
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{TMDB_BASE_URL}/movie/{tmdb_id}",
params={"api_key": settings.tmdb_api_key},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Get production_countries (list of objects with iso_3166_1)
if "production_countries" in data and data["production_countries"]:
countries = data["production_countries"]
if isinstance(countries, list) and len(countries) > 0:
country = countries[0]
if isinstance(country, dict) and "iso_3166_1" in country:
return country["iso_3166_1"].upper()
except Exception as e:
logger.debug(f"Failed to fetch TMDB data for movie {tmdb_id}: {e}")
return None
def extract_country_from_radarr(movie: Dict) -> Optional[str]:
"""Extract country code from Radarr movie metadata (synchronous check only)"""
# Try productionCountries first
if "productionCountries" in movie and movie["productionCountries"]:
countries = movie["productionCountries"]
if isinstance(countries, list) and len(countries) > 0:
country = countries[0]
if isinstance(country, dict) and "iso_3166_1" in country:
return country["iso_3166_1"].upper()
elif isinstance(country, str):
# Try to map country name to code (simplified)
return None # Would need a mapping table
# Try to get from TMDB metadata if available
if "tmdbId" in movie and movie.get("movieMetadata", {}).get("productionCountries"):
countries = movie["movieMetadata"]["productionCountries"]
if isinstance(countries, list) and len(countries) > 0:
country = countries[0]
if isinstance(country, dict) and "iso_3166_1" in country:
return country["iso_3166_1"].upper()
# Note: TMDB lookup must be done asynchronously in sync_radarr()
return None
async def get_tmdb_tv_country(tmdb_id: int) -> Optional[str]:
"""Get country code from TMDB API for a TV series"""
if not settings.tmdb_api_key:
return None
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{TMDB_BASE_URL}/tv/{tmdb_id}",
params={"api_key": settings.tmdb_api_key},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Get origin_country (list of ISO 3166-1 codes)
if "origin_country" in data and data["origin_country"]:
countries = data["origin_country"]
if isinstance(countries, list) and len(countries) > 0:
return countries[0].upper()
except Exception as e:
logger.debug(f"Failed to fetch TMDB data for TV {tmdb_id}: {e}")
return None
def extract_country_from_sonarr(series: Dict) -> Optional[str]:
"""Extract country code from Sonarr series metadata (synchronous check only)"""
# Try seriesMetadata first (if available)
if "seriesMetadata" in series and series["seriesMetadata"]:
metadata = series["seriesMetadata"]
if "originCountry" in metadata and metadata["originCountry"]:
origin = metadata["originCountry"]
if isinstance(origin, list) and len(origin) > 0:
code = origin[0].upper() if len(origin[0]) == 2 else None
if code:
return code
elif isinstance(origin, str) and len(origin) == 2:
return origin.upper()
# Note: TMDB lookup must be done asynchronously in sync_sonarr()
return None
async def get_musicbrainz_artist_country(mbid: str) -> Optional[str]:
"""Get country code from MusicBrainz API for an artist"""
try:
async with httpx.AsyncClient() as client:
# MusicBrainz API doesn't require an API key
response = await client.get(
f"https://musicbrainz.org/ws/2/artist/{mbid}",
params={"fmt": "json", "inc": "area-rels"},
headers={"User-Agent": "MovieMap/1.0 (https://github.com/yourusername/movie-map)"},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Check area relations for country
if "relations" in data:
for relation in data["relations"]:
if relation.get("type") == "origin" and "area" in relation:
area = relation["area"]
if "iso-3166-1-codes" in area and area["iso-3166-1-codes"]:
codes = area["iso-3166-1-codes"]
if isinstance(codes, list) and len(codes) > 0:
return codes[0].upper()
except Exception as e:
logger.debug(f"Failed to fetch MusicBrainz data for artist {mbid}: {e}")
return None
def extract_country_from_lidarr(artist: Dict) -> Optional[str]:
"""Extract country code from Lidarr artist metadata (synchronous check only)"""
# Check top-level country field
if "country" in artist and artist["country"]:
country = artist["country"]
if isinstance(country, str) and len(country) == 2:
return country.upper()
# Note: MusicBrainz lookup must be done asynchronously in sync_lidarr()
return None
async def upsert_media_item(source_kind: str, source_item_id: int, title: str,
year: Optional[int], media_type: str, arr_raw: Dict):
"""Upsert a media item into the database"""
from app.core.database import init_db, pool as db_pool
await init_db()
if db_pool is None:
raise Exception("Database not available")
async with db_pool.connection() as conn:
async with conn.cursor() as cur:
# Upsert media item
query = """
INSERT INTO moviemap.media_item
(source_kind, source_item_id, title, year, media_type, arr_raw)
VALUES (%s, %s, %s, %s, %s, %s::jsonb)
ON CONFLICT (source_kind, source_item_id)
DO UPDATE SET
title = EXCLUDED.title,
year = EXCLUDED.year,
arr_raw = EXCLUDED.arr_raw
RETURNING id
"""
await cur.execute(
query,
(source_kind, source_item_id, title, year, media_type, json.dumps(arr_raw))
)
result = await cur.fetchone()
media_item_id = result[0]
# Extract and upsert country
country_code = None
if source_kind == "radarr":
country_code = extract_country_from_radarr(arr_raw)
elif source_kind == "sonarr":
country_code = extract_country_from_sonarr(arr_raw)
elif source_kind == "lidarr":
country_code = extract_country_from_lidarr(arr_raw)
# Delete existing country associations
await cur.execute(
"DELETE FROM moviemap.media_country WHERE media_item_id = %s",
(media_item_id,)
)
# Insert new country association if found
if country_code:
await cur.execute(
"INSERT INTO moviemap.media_country (media_item_id, country_code) VALUES (%s, %s)",
(media_item_id, country_code)
)
else:
# Log when country extraction fails for debugging
logger.debug(f"Could not extract country for {source_kind} item {source_item_id}: {title}")
await conn.commit()
return media_item_id
async def sync_radarr():
"""Sync movies from Radarr"""
movies = await fetch_radarr_movies()
synced = 0
for movie in movies:
try:
# Try to get country from TMDB if tmdbId is available and no country in Radarr data
country_code = extract_country_from_radarr(movie)
if not country_code and "tmdbId" in movie and movie["tmdbId"]:
country_code = await get_tmdb_movie_country(movie["tmdbId"])
# Store TMDB country in the movie data for upsert_media_item to use
if country_code:
if "productionCountries" not in movie:
movie["productionCountries"] = []
movie["productionCountries"].append({"iso_3166_1": country_code})
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="radarr",
source_item_id=movie.get("id"),
title=movie.get("title", "Unknown"),
year=movie.get("year"),
media_type="movie",
arr_raw=movie
)
synced += 1
except Exception as e:
logger.error(f"Failed to sync movie {movie.get('id')}: {e}")
return {"radarr": synced}
async def sync_sonarr():
"""Sync series from Sonarr"""
series = await fetch_sonarr_series()
synced = 0
for s in series:
try:
# Try to get country from TMDB if tmdbId is available and no country in Sonarr data
country_code = extract_country_from_sonarr(s)
if not country_code and "tmdbId" in s and s["tmdbId"]:
country_code = await get_tmdb_tv_country(s["tmdbId"])
# Store TMDB country in the series data for upsert_media_item to use
if country_code:
if "seriesMetadata" not in s:
s["seriesMetadata"] = {}
s["seriesMetadata"]["originCountry"] = [country_code]
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="sonarr",
source_item_id=s.get("id"),
title=s.get("title", "Unknown"),
year=s.get("year"),
media_type="show",
arr_raw=s
)
synced += 1
except Exception as e:
logger.error(f"Failed to sync series {s.get('id')}: {e}")
return {"sonarr": synced}
async def sync_lidarr():
"""Sync artists from Lidarr"""
artists = await fetch_lidarr_artists()
synced = 0
for artist in artists:
try:
# Try to get country from MusicBrainz if foreignArtistId (MBID) is available
country_code = extract_country_from_lidarr(artist)
if not country_code and "foreignArtistId" in artist and artist["foreignArtistId"]:
# foreignArtistId in Lidarr is the MusicBrainz ID
mbid = artist["foreignArtistId"]
country_code = await get_musicbrainz_artist_country(mbid)
# Store MusicBrainz country in the artist data
if country_code:
artist["country"] = country_code
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="lidarr",
source_item_id=artist.get("id"),
title=artist.get("artistName", "Unknown"),
year=None, # Artists don't have a year
media_type="music",
arr_raw=artist
)
synced += 1
except Exception as e:
logger.error(f"Failed to sync artist {artist.get('id')}: {e}")
return {"lidarr": synced}
async def sync_all_arrs() -> Dict:
"""Sync from all *arr instances"""
logger.info("Starting sync from all *arr instances")
results = {}
# Sync each service
try:
results.update(await sync_radarr())
except Exception as e:
logger.error(f"Radarr sync failed: {e}")
results["radarr"] = 0
try:
results.update(await sync_sonarr())
except Exception as e:
logger.error(f"Sonarr sync failed: {e}")
results["sonarr"] = 0
try:
results.update(await sync_lidarr())
except Exception as e:
logger.error(f"Lidarr sync failed: {e}")
results["lidarr"] = 0
# Update last sync time
from app.core.database import init_db, pool as db_pool
await init_db()
if db_pool is None:
raise Exception("Database not available")
async with db_pool.connection() as conn:
async with conn.cursor() as cur:
for source_kind in ["radarr", "sonarr", "lidarr"]:
await cur.execute(
"""
INSERT INTO moviemap.source (kind, base_url, enabled, last_sync_at)
VALUES (%s, %s, %s, NOW())
ON CONFLICT (kind) DO UPDATE SET last_sync_at = NOW()
""",
(source_kind, getattr(settings, f"{source_kind}_url"), True)
)
await conn.commit()
logger.info(f"Sync completed: {results}")
return results