Add country metadata extraction and assignment features

- Updated `README.md` to include instructions for setting up the TMDB API key and new admin endpoints for managing country metadata.
- Implemented `/admin/missing-countries` endpoint to list media items without country metadata, with filtering options for source and media type.
- Added `/admin/assign-country` endpoint to manually assign a country code to a media item.
- Enhanced country extraction logic in `sync.py` to utilize TMDB and MusicBrainz APIs for automatic country retrieval based on available metadata.
- Updated configuration in `config.py` to include optional TMDB API key setting.
- Improved error handling and logging for country extraction failures.
- Ensured that country data is stored and utilized during media item synchronization across Radarr, Sonarr, and Lidarr.
This commit is contained in:
Danilo Reyes
2025-12-28 21:47:03 -06:00
parent 6cffbef8c6
commit 335a53ee62
4 changed files with 337 additions and 22 deletions

View File

@@ -4,6 +4,12 @@ import logging
from typing import Dict, List, Optional
from app.core.config import settings
import json
import os
logger = logging.getLogger(__name__)
# TMDB API configuration
TMDB_BASE_URL = "https://api.themoviedb.org/3"
logger = logging.getLogger(__name__)
@@ -68,8 +74,35 @@ async def fetch_lidarr_artists() -> List[Dict]:
return []
async def get_tmdb_movie_country(tmdb_id: int) -> Optional[str]:
"""Get country code from TMDB API for a movie"""
if not settings.tmdb_api_key:
return None
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{TMDB_BASE_URL}/movie/{tmdb_id}",
params={"api_key": settings.tmdb_api_key},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Get production_countries (list of objects with iso_3166_1)
if "production_countries" in data and data["production_countries"]:
countries = data["production_countries"]
if isinstance(countries, list) and len(countries) > 0:
country = countries[0]
if isinstance(country, dict) and "iso_3166_1" in country:
return country["iso_3166_1"].upper()
except Exception as e:
logger.debug(f"Failed to fetch TMDB data for movie {tmdb_id}: {e}")
return None
def extract_country_from_radarr(movie: Dict) -> Optional[str]:
"""Extract country code from Radarr movie metadata"""
"""Extract country code from Radarr movie metadata (synchronous check only)"""
# Try productionCountries first
if "productionCountries" in movie and movie["productionCountries"]:
countries = movie["productionCountries"]
@@ -89,40 +122,90 @@ def extract_country_from_radarr(movie: Dict) -> Optional[str]:
if isinstance(country, dict) and "iso_3166_1" in country:
return country["iso_3166_1"].upper()
# Note: TMDB lookup must be done asynchronously in sync_radarr()
return None
async def get_tmdb_tv_country(tmdb_id: int) -> Optional[str]:
"""Get country code from TMDB API for a TV series"""
if not settings.tmdb_api_key:
return None
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{TMDB_BASE_URL}/tv/{tmdb_id}",
params={"api_key": settings.tmdb_api_key},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Get origin_country (list of ISO 3166-1 codes)
if "origin_country" in data and data["origin_country"]:
countries = data["origin_country"]
if isinstance(countries, list) and len(countries) > 0:
return countries[0].upper()
except Exception as e:
logger.debug(f"Failed to fetch TMDB data for TV {tmdb_id}: {e}")
return None
def extract_country_from_sonarr(series: Dict) -> Optional[str]:
"""Extract country code from Sonarr series metadata"""
# Sonarr doesn't always have country info directly
# Check network origin or other metadata
if "network" in series and series["network"]:
# Network name might hint at country, but not reliable
pass
# Check if there's any country metadata
if "seriesMetadata" in series:
"""Extract country code from Sonarr series metadata (synchronous check only)"""
# Try seriesMetadata first (if available)
if "seriesMetadata" in series and series["seriesMetadata"]:
metadata = series["seriesMetadata"]
if "originCountry" in metadata and metadata["originCountry"]:
# originCountry might be a list or string
origin = metadata["originCountry"]
if isinstance(origin, list) and len(origin) > 0:
return origin[0].upper() if len(origin[0]) == 2 else None
code = origin[0].upper() if len(origin[0]) == 2 else None
if code:
return code
elif isinstance(origin, str) and len(origin) == 2:
return origin.upper()
# Note: TMDB lookup must be done asynchronously in sync_sonarr()
return None
async def get_musicbrainz_artist_country(mbid: str) -> Optional[str]:
"""Get country code from MusicBrainz API for an artist"""
try:
async with httpx.AsyncClient() as client:
# MusicBrainz API doesn't require an API key
response = await client.get(
f"https://musicbrainz.org/ws/2/artist/{mbid}",
params={"fmt": "json", "inc": "area-rels"},
headers={"User-Agent": "MovieMap/1.0 (https://github.com/yourusername/movie-map)"},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Check area relations for country
if "relations" in data:
for relation in data["relations"]:
if relation.get("type") == "origin" and "area" in relation:
area = relation["area"]
if "iso-3166-1-codes" in area and area["iso-3166-1-codes"]:
codes = area["iso-3166-1-codes"]
if isinstance(codes, list) and len(codes) > 0:
return codes[0].upper()
except Exception as e:
logger.debug(f"Failed to fetch MusicBrainz data for artist {mbid}: {e}")
return None
def extract_country_from_lidarr(artist: Dict) -> Optional[str]:
"""Extract country code from Lidarr artist metadata"""
# Lidarr has a country field
"""Extract country code from Lidarr artist metadata (synchronous check only)"""
# Check top-level country field
if "country" in artist and artist["country"]:
country = artist["country"]
if isinstance(country, str) and len(country) == 2:
return country.upper()
# Might be a country name, would need mapping
# Note: MusicBrainz lookup must be done asynchronously in sync_lidarr()
return None
@@ -176,6 +259,9 @@ async def upsert_media_item(source_kind: str, source_item_id: int, title: str,
"INSERT INTO moviemap.media_country (media_item_id, country_code) VALUES (%s, %s)",
(media_item_id, country_code)
)
else:
# Log when country extraction fails for debugging
logger.debug(f"Could not extract country for {source_kind} item {source_item_id}: {title}")
await conn.commit()
return media_item_id
@@ -188,6 +274,17 @@ async def sync_radarr():
for movie in movies:
try:
# Try to get country from TMDB if tmdbId is available and no country in Radarr data
country_code = extract_country_from_radarr(movie)
if not country_code and "tmdbId" in movie and movie["tmdbId"]:
country_code = await get_tmdb_movie_country(movie["tmdbId"])
# Store TMDB country in the movie data for upsert_media_item to use
if country_code:
if "productionCountries" not in movie:
movie["productionCountries"] = []
movie["productionCountries"].append({"iso_3166_1": country_code})
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="radarr",
source_item_id=movie.get("id"),
@@ -210,6 +307,17 @@ async def sync_sonarr():
for s in series:
try:
# Try to get country from TMDB if tmdbId is available and no country in Sonarr data
country_code = extract_country_from_sonarr(s)
if not country_code and "tmdbId" in s and s["tmdbId"]:
country_code = await get_tmdb_tv_country(s["tmdbId"])
# Store TMDB country in the series data for upsert_media_item to use
if country_code:
if "seriesMetadata" not in s:
s["seriesMetadata"] = {}
s["seriesMetadata"]["originCountry"] = [country_code]
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="sonarr",
source_item_id=s.get("id"),
@@ -232,6 +340,17 @@ async def sync_lidarr():
for artist in artists:
try:
# Try to get country from MusicBrainz if foreignArtistId (MBID) is available
country_code = extract_country_from_lidarr(artist)
if not country_code and "foreignArtistId" in artist and artist["foreignArtistId"]:
# foreignArtistId in Lidarr is the MusicBrainz ID
mbid = artist["foreignArtistId"]
country_code = await get_musicbrainz_artist_country(mbid)
# Store MusicBrainz country in the artist data
if country_code:
artist["country"] = country_code
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="lidarr",
source_item_id=artist.get("id"),