Add country metadata extraction and assignment features

- Updated `README.md` to include instructions for setting up the TMDB API key and new admin endpoints for managing country metadata.
- Implemented `/admin/missing-countries` endpoint to list media items without country metadata, with filtering options for source and media type.
- Added `/admin/assign-country` endpoint to manually assign a country code to a media item.
- Enhanced country extraction logic in `sync.py` to utilize TMDB and MusicBrainz APIs for automatic country retrieval based on available metadata.
- Updated configuration in `config.py` to include optional TMDB API key setting.
- Improved error handling and logging for country extraction failures.
- Ensured that country data is stored and utilized during media item synchronization across Radarr, Sonarr, and Lidarr.
This commit is contained in:
Danilo Reyes
2025-12-28 21:47:03 -06:00
parent 6cffbef8c6
commit 335a53ee62
4 changed files with 337 additions and 22 deletions

View File

@@ -1,7 +1,8 @@
"""Admin API endpoints"""
from fastapi import APIRouter, HTTPException, Header
from typing import Optional
from fastapi import APIRouter, HTTPException, Header, Query
from typing import Optional, List
from app.core.config import settings
from app.core.database import init_db, pool as db_pool
from app.services.sync import sync_all_arrs
router = APIRouter()
@@ -32,3 +33,132 @@ async def trigger_sync(authorization: Optional[str] = Header(None)):
except Exception as e:
raise HTTPException(status_code=500, detail=f"Sync failed: {str(e)}")
@router.get("/missing-countries")
async def get_missing_countries(
authorization: Optional[str] = Header(None),
source_kind: Optional[str] = Query(None, description="Filter by source: radarr, sonarr, lidarr"),
media_type: Optional[str] = Query(None, description="Filter by media type: movie, show, music"),
limit: int = Query(100, ge=1, le=1000)
):
"""
Get list of media items without country metadata.
Requires admin token if MOVIEMAP_ADMIN_TOKEN is set.
"""
await verify_admin_token(authorization)
await init_db()
if db_pool is None:
raise HTTPException(status_code=503, detail="Database not available")
async with db_pool.connection() as conn:
async with conn.cursor() as cur:
# Get total count
count_query = """
SELECT COUNT(DISTINCT mi.id)
FROM moviemap.media_item mi
LEFT JOIN moviemap.media_country mc ON mi.id = mc.media_item_id
WHERE mc.media_item_id IS NULL
"""
count_params = []
if source_kind:
count_query += " AND mi.source_kind = %s"
count_params.append(source_kind)
if media_type:
count_query += " AND mi.media_type = %s"
count_params.append(media_type)
await cur.execute(count_query, count_params if count_params else None)
total_count = (await cur.fetchone())[0]
# Get items
query = """
SELECT
mi.id,
mi.source_kind,
mi.source_item_id,
mi.title,
mi.year,
mi.media_type
FROM moviemap.media_item mi
LEFT JOIN moviemap.media_country mc ON mi.id = mc.media_item_id
WHERE mc.media_item_id IS NULL
"""
params = []
if source_kind:
query += " AND mi.source_kind = %s"
params.append(source_kind)
if media_type:
query += " AND mi.media_type = %s"
params.append(media_type)
query += " ORDER BY mi.title LIMIT %s"
params.append(limit)
await cur.execute(query, params)
rows = await cur.fetchall()
items = []
for row in rows:
items.append({
"id": str(row[0]),
"source_kind": row[1],
"source_item_id": row[2],
"title": row[3],
"year": row[4],
"media_type": row[5],
})
return {
"total": total_count,
"returned": len(items),
"items": items
}
@router.post("/assign-country")
async def assign_country_manually(
item_id: str,
country_code: str,
authorization: Optional[str] = Header(None)
):
"""
Manually assign a country code to a media item.
Requires admin token if MOVIEMAP_ADMIN_TOKEN is set.
"""
await verify_admin_token(authorization)
await init_db()
if db_pool is None:
raise HTTPException(status_code=503, detail="Database not available")
# Validate country code (should be 2 letters)
if len(country_code) != 2 or not country_code.isalpha():
raise HTTPException(status_code=400, detail="Country code must be 2 letters (ISO 3166-1 alpha-2)")
country_code = country_code.upper()
async with db_pool.connection() as conn:
async with conn.cursor() as cur:
# Check if item exists
await cur.execute("SELECT id FROM moviemap.media_item WHERE id = %s", (item_id,))
if not await cur.fetchone():
raise HTTPException(status_code=404, detail="Media item not found")
# Insert or update country association
await cur.execute(
"""
INSERT INTO moviemap.media_country (media_item_id, country_code)
VALUES (%s, %s)
ON CONFLICT (media_item_id, country_code) DO NOTHING
""",
(item_id, country_code)
)
await conn.commit()
return {
"status": "success",
"item_id": item_id,
"country_code": country_code
}

View File

@@ -29,6 +29,9 @@ class Settings(BaseSettings):
# Admin
admin_token: Optional[str] = os.getenv("MOVIEMAP_ADMIN_TOKEN")
# External APIs (optional)
tmdb_api_key: str = os.getenv("TMDB_API_KEY", "")
@property
def database_url(self) -> str:
"""Build PostgreSQL connection string using Unix socket"""

View File

@@ -4,6 +4,12 @@ import logging
from typing import Dict, List, Optional
from app.core.config import settings
import json
import os
logger = logging.getLogger(__name__)
# TMDB API configuration
TMDB_BASE_URL = "https://api.themoviedb.org/3"
logger = logging.getLogger(__name__)
@@ -68,8 +74,35 @@ async def fetch_lidarr_artists() -> List[Dict]:
return []
async def get_tmdb_movie_country(tmdb_id: int) -> Optional[str]:
"""Get country code from TMDB API for a movie"""
if not settings.tmdb_api_key:
return None
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{TMDB_BASE_URL}/movie/{tmdb_id}",
params={"api_key": settings.tmdb_api_key},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Get production_countries (list of objects with iso_3166_1)
if "production_countries" in data and data["production_countries"]:
countries = data["production_countries"]
if isinstance(countries, list) and len(countries) > 0:
country = countries[0]
if isinstance(country, dict) and "iso_3166_1" in country:
return country["iso_3166_1"].upper()
except Exception as e:
logger.debug(f"Failed to fetch TMDB data for movie {tmdb_id}: {e}")
return None
def extract_country_from_radarr(movie: Dict) -> Optional[str]:
"""Extract country code from Radarr movie metadata"""
"""Extract country code from Radarr movie metadata (synchronous check only)"""
# Try productionCountries first
if "productionCountries" in movie and movie["productionCountries"]:
countries = movie["productionCountries"]
@@ -89,40 +122,90 @@ def extract_country_from_radarr(movie: Dict) -> Optional[str]:
if isinstance(country, dict) and "iso_3166_1" in country:
return country["iso_3166_1"].upper()
# Note: TMDB lookup must be done asynchronously in sync_radarr()
return None
async def get_tmdb_tv_country(tmdb_id: int) -> Optional[str]:
"""Get country code from TMDB API for a TV series"""
if not settings.tmdb_api_key:
return None
try:
async with httpx.AsyncClient() as client:
response = await client.get(
f"{TMDB_BASE_URL}/tv/{tmdb_id}",
params={"api_key": settings.tmdb_api_key},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Get origin_country (list of ISO 3166-1 codes)
if "origin_country" in data and data["origin_country"]:
countries = data["origin_country"]
if isinstance(countries, list) and len(countries) > 0:
return countries[0].upper()
except Exception as e:
logger.debug(f"Failed to fetch TMDB data for TV {tmdb_id}: {e}")
return None
def extract_country_from_sonarr(series: Dict) -> Optional[str]:
"""Extract country code from Sonarr series metadata"""
# Sonarr doesn't always have country info directly
# Check network origin or other metadata
if "network" in series and series["network"]:
# Network name might hint at country, but not reliable
pass
# Check if there's any country metadata
if "seriesMetadata" in series:
"""Extract country code from Sonarr series metadata (synchronous check only)"""
# Try seriesMetadata first (if available)
if "seriesMetadata" in series and series["seriesMetadata"]:
metadata = series["seriesMetadata"]
if "originCountry" in metadata and metadata["originCountry"]:
# originCountry might be a list or string
origin = metadata["originCountry"]
if isinstance(origin, list) and len(origin) > 0:
return origin[0].upper() if len(origin[0]) == 2 else None
code = origin[0].upper() if len(origin[0]) == 2 else None
if code:
return code
elif isinstance(origin, str) and len(origin) == 2:
return origin.upper()
# Note: TMDB lookup must be done asynchronously in sync_sonarr()
return None
async def get_musicbrainz_artist_country(mbid: str) -> Optional[str]:
"""Get country code from MusicBrainz API for an artist"""
try:
async with httpx.AsyncClient() as client:
# MusicBrainz API doesn't require an API key
response = await client.get(
f"https://musicbrainz.org/ws/2/artist/{mbid}",
params={"fmt": "json", "inc": "area-rels"},
headers={"User-Agent": "MovieMap/1.0 (https://github.com/yourusername/movie-map)"},
timeout=5.0
)
if response.status_code == 200:
data = response.json()
# Check area relations for country
if "relations" in data:
for relation in data["relations"]:
if relation.get("type") == "origin" and "area" in relation:
area = relation["area"]
if "iso-3166-1-codes" in area and area["iso-3166-1-codes"]:
codes = area["iso-3166-1-codes"]
if isinstance(codes, list) and len(codes) > 0:
return codes[0].upper()
except Exception as e:
logger.debug(f"Failed to fetch MusicBrainz data for artist {mbid}: {e}")
return None
def extract_country_from_lidarr(artist: Dict) -> Optional[str]:
"""Extract country code from Lidarr artist metadata"""
# Lidarr has a country field
"""Extract country code from Lidarr artist metadata (synchronous check only)"""
# Check top-level country field
if "country" in artist and artist["country"]:
country = artist["country"]
if isinstance(country, str) and len(country) == 2:
return country.upper()
# Might be a country name, would need mapping
# Note: MusicBrainz lookup must be done asynchronously in sync_lidarr()
return None
@@ -176,6 +259,9 @@ async def upsert_media_item(source_kind: str, source_item_id: int, title: str,
"INSERT INTO moviemap.media_country (media_item_id, country_code) VALUES (%s, %s)",
(media_item_id, country_code)
)
else:
# Log when country extraction fails for debugging
logger.debug(f"Could not extract country for {source_kind} item {source_item_id}: {title}")
await conn.commit()
return media_item_id
@@ -188,6 +274,17 @@ async def sync_radarr():
for movie in movies:
try:
# Try to get country from TMDB if tmdbId is available and no country in Radarr data
country_code = extract_country_from_radarr(movie)
if not country_code and "tmdbId" in movie and movie["tmdbId"]:
country_code = await get_tmdb_movie_country(movie["tmdbId"])
# Store TMDB country in the movie data for upsert_media_item to use
if country_code:
if "productionCountries" not in movie:
movie["productionCountries"] = []
movie["productionCountries"].append({"iso_3166_1": country_code})
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="radarr",
source_item_id=movie.get("id"),
@@ -210,6 +307,17 @@ async def sync_sonarr():
for s in series:
try:
# Try to get country from TMDB if tmdbId is available and no country in Sonarr data
country_code = extract_country_from_sonarr(s)
if not country_code and "tmdbId" in s and s["tmdbId"]:
country_code = await get_tmdb_tv_country(s["tmdbId"])
# Store TMDB country in the series data for upsert_media_item to use
if country_code:
if "seriesMetadata" not in s:
s["seriesMetadata"] = {}
s["seriesMetadata"]["originCountry"] = [country_code]
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="sonarr",
source_item_id=s.get("id"),
@@ -232,6 +340,17 @@ async def sync_lidarr():
for artist in artists:
try:
# Try to get country from MusicBrainz if foreignArtistId (MBID) is available
country_code = extract_country_from_lidarr(artist)
if not country_code and "foreignArtistId" in artist and artist["foreignArtistId"]:
# foreignArtistId in Lidarr is the MusicBrainz ID
mbid = artist["foreignArtistId"]
country_code = await get_musicbrainz_artist_country(mbid)
# Store MusicBrainz country in the artist data
if country_code:
artist["country"] = country_code
# Upsert media item (will extract country from the data we just prepared)
await upsert_media_item(
source_kind="lidarr",
source_item_id=artist.get("id"),