From 335a53ee62bd75aea4c1181bad5e13dc3c1edf0b Mon Sep 17 00:00:00 2001 From: Danilo Reyes Date: Sun, 28 Dec 2025 21:47:03 -0600 Subject: [PATCH] Add country metadata extraction and assignment features - Updated `README.md` to include instructions for setting up the TMDB API key and new admin endpoints for managing country metadata. - Implemented `/admin/missing-countries` endpoint to list media items without country metadata, with filtering options for source and media type. - Added `/admin/assign-country` endpoint to manually assign a country code to a media item. - Enhanced country extraction logic in `sync.py` to utilize TMDB and MusicBrainz APIs for automatic country retrieval based on available metadata. - Updated configuration in `config.py` to include optional TMDB API key setting. - Improved error handling and logging for country extraction failures. - Ensured that country data is stored and utilized during media item synchronization across Radarr, Sonarr, and Lidarr. --- README.md | 73 +++++++++++++++-- backend/app/api/admin.py | 134 ++++++++++++++++++++++++++++++- backend/app/core/config.py | 3 + backend/app/services/sync.py | 149 +++++++++++++++++++++++++++++++---- 4 files changed, 337 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 5a3ab14..f5cc0b1 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,9 @@ RADARR_API_KEY=your_radarr_api_key LIDARR_API_KEY=your_lidarr_api_key PORT=8080 HOST=0.0.0.0 # Use 127.0.0.1 for localhost only + +# Optional: External APIs for country data +TMDB_API_KEY=your_tmdb_api_key # Get from https://www.themoviedb.org/settings/api ``` 3. (Optional) Set up frontend environment variables (create `.env.local` in `frontend/`): @@ -220,6 +223,8 @@ The service will be available at `http://0.0.0.0:8080` (or your server's IP addr ### Admin - `POST /admin/sync` - Trigger sync from all *arr instances (requires admin token if configured) +- `GET /admin/missing-countries?source_kind=sonarr&media_type=show&limit=100` - List items without country metadata +- `POST /admin/assign-country?item_id=&country_code=US` - Manually assign country to an item ## Database Schema @@ -233,13 +238,71 @@ The application creates a `moviemap` schema in the `jawz` database with the foll ## Country Extraction -The sync process extracts country information from *arr metadata: +The sync process extracts country information using multiple methods: -- **Radarr**: Uses `productionCountries` from movie metadata -- **Sonarr**: Uses `originCountry` from series metadata (if available) -- **Lidarr**: Uses `country` field from artist metadata +### Automatic Extraction -If country information is not available, the item is stored without a country association (excluded from map visualization). +- **Radarr (Movies)**: + - First tries `productionCountries` from Radarr metadata + - Falls back to TMDB API (requires `TMDB_API_KEY` env var) using `tmdbId` + +- **Sonarr (TV Shows)**: + - First tries `seriesMetadata.originCountry` from Sonarr metadata + - Falls back to TMDB API (requires `TMDB_API_KEY` env var) using `tmdbId` + +- **Lidarr (Music)**: + - First tries `country` field from Lidarr metadata + - Falls back to MusicBrainz API (no API key required) using `foreignArtistId` (MBID) + +### External API Setup + +**TMDB API (for Movies & TV Shows):** +1. Get a free API key from https://www.themoviedb.org/settings/api +2. Set environment variable: `TMDB_API_KEY=your_api_key_here` +3. Re-run sync to fetch country data + +**MusicBrainz API (for Music):** +- No API key required (uses public API) +- Automatically used if `foreignArtistId` (MusicBrainz ID) is available in Lidarr + +### Manual Assignment + +If automatic extraction fails, you can manually assign countries: + +1. **View missing countries:** + ```bash + curl http://127.0.0.1:8888/admin/missing-countries?source_kind=sonarr&limit=50 + ``` + +2. **Assign country manually:** + ```bash + curl -X POST "http://127.0.0.1:8888/admin/assign-country?item_id=&country_code=US" + ``` + +Items without country information are stored but excluded from map visualization until a country is assigned. + +## Implementation Status + +### ✅ Completed Features + +- ✅ Project scaffolding (FastAPI backend + React frontend) +- ✅ Database schema and migrations (PostgreSQL) +- ✅ *arr sync integration (Radarr, Sonarr, Lidarr) +- ✅ Collection Map UI (View 1) with filters +- ✅ Watched Map UI (View 2) with manual tracking +- ✅ NixOS module and systemd deployment +- ✅ TMDB API integration for movies and TV shows +- ✅ MusicBrainz API integration for music +- ✅ Manual country assignment feature +- ✅ Missing metadata admin view + +### 🔄 Future Enhancements (Optional) + +- Batch country assignment UI +- Country extraction from file paths/metadata +- Export/import functionality +- Statistics and analytics +- Multi-user support ## License diff --git a/backend/app/api/admin.py b/backend/app/api/admin.py index 93ec2b0..5596364 100644 --- a/backend/app/api/admin.py +++ b/backend/app/api/admin.py @@ -1,7 +1,8 @@ """Admin API endpoints""" -from fastapi import APIRouter, HTTPException, Header -from typing import Optional +from fastapi import APIRouter, HTTPException, Header, Query +from typing import Optional, List from app.core.config import settings +from app.core.database import init_db, pool as db_pool from app.services.sync import sync_all_arrs router = APIRouter() @@ -32,3 +33,132 @@ async def trigger_sync(authorization: Optional[str] = Header(None)): except Exception as e: raise HTTPException(status_code=500, detail=f"Sync failed: {str(e)}") + +@router.get("/missing-countries") +async def get_missing_countries( + authorization: Optional[str] = Header(None), + source_kind: Optional[str] = Query(None, description="Filter by source: radarr, sonarr, lidarr"), + media_type: Optional[str] = Query(None, description="Filter by media type: movie, show, music"), + limit: int = Query(100, ge=1, le=1000) +): + """ + Get list of media items without country metadata. + Requires admin token if MOVIEMAP_ADMIN_TOKEN is set. + """ + await verify_admin_token(authorization) + await init_db() + if db_pool is None: + raise HTTPException(status_code=503, detail="Database not available") + + async with db_pool.connection() as conn: + async with conn.cursor() as cur: + # Get total count + count_query = """ + SELECT COUNT(DISTINCT mi.id) + FROM moviemap.media_item mi + LEFT JOIN moviemap.media_country mc ON mi.id = mc.media_item_id + WHERE mc.media_item_id IS NULL + """ + count_params = [] + + if source_kind: + count_query += " AND mi.source_kind = %s" + count_params.append(source_kind) + if media_type: + count_query += " AND mi.media_type = %s" + count_params.append(media_type) + + await cur.execute(count_query, count_params if count_params else None) + total_count = (await cur.fetchone())[0] + + # Get items + query = """ + SELECT + mi.id, + mi.source_kind, + mi.source_item_id, + mi.title, + mi.year, + mi.media_type + FROM moviemap.media_item mi + LEFT JOIN moviemap.media_country mc ON mi.id = mc.media_item_id + WHERE mc.media_item_id IS NULL + """ + params = [] + + if source_kind: + query += " AND mi.source_kind = %s" + params.append(source_kind) + if media_type: + query += " AND mi.media_type = %s" + params.append(media_type) + + query += " ORDER BY mi.title LIMIT %s" + params.append(limit) + + await cur.execute(query, params) + rows = await cur.fetchall() + + items = [] + for row in rows: + items.append({ + "id": str(row[0]), + "source_kind": row[1], + "source_item_id": row[2], + "title": row[3], + "year": row[4], + "media_type": row[5], + }) + + return { + "total": total_count, + "returned": len(items), + "items": items + } + + +@router.post("/assign-country") +async def assign_country_manually( + item_id: str, + country_code: str, + authorization: Optional[str] = Header(None) +): + """ + Manually assign a country code to a media item. + Requires admin token if MOVIEMAP_ADMIN_TOKEN is set. + """ + await verify_admin_token(authorization) + await init_db() + if db_pool is None: + raise HTTPException(status_code=503, detail="Database not available") + + # Validate country code (should be 2 letters) + if len(country_code) != 2 or not country_code.isalpha(): + raise HTTPException(status_code=400, detail="Country code must be 2 letters (ISO 3166-1 alpha-2)") + + country_code = country_code.upper() + + async with db_pool.connection() as conn: + async with conn.cursor() as cur: + # Check if item exists + await cur.execute("SELECT id FROM moviemap.media_item WHERE id = %s", (item_id,)) + if not await cur.fetchone(): + raise HTTPException(status_code=404, detail="Media item not found") + + # Insert or update country association + await cur.execute( + """ + INSERT INTO moviemap.media_country (media_item_id, country_code) + VALUES (%s, %s) + ON CONFLICT (media_item_id, country_code) DO NOTHING + """, + (item_id, country_code) + ) + await conn.commit() + + return { + "status": "success", + "item_id": item_id, + "country_code": country_code + } + diff --git a/backend/app/core/config.py b/backend/app/core/config.py index 43a0f1f..1229576 100644 --- a/backend/app/core/config.py +++ b/backend/app/core/config.py @@ -29,6 +29,9 @@ class Settings(BaseSettings): # Admin admin_token: Optional[str] = os.getenv("MOVIEMAP_ADMIN_TOKEN") + # External APIs (optional) + tmdb_api_key: str = os.getenv("TMDB_API_KEY", "") + @property def database_url(self) -> str: """Build PostgreSQL connection string using Unix socket""" diff --git a/backend/app/services/sync.py b/backend/app/services/sync.py index 4a05081..c3bb70f 100644 --- a/backend/app/services/sync.py +++ b/backend/app/services/sync.py @@ -4,6 +4,12 @@ import logging from typing import Dict, List, Optional from app.core.config import settings import json +import os + +logger = logging.getLogger(__name__) + +# TMDB API configuration +TMDB_BASE_URL = "https://api.themoviedb.org/3" logger = logging.getLogger(__name__) @@ -68,8 +74,35 @@ async def fetch_lidarr_artists() -> List[Dict]: return [] +async def get_tmdb_movie_country(tmdb_id: int) -> Optional[str]: + """Get country code from TMDB API for a movie""" + if not settings.tmdb_api_key: + return None + + try: + async with httpx.AsyncClient() as client: + response = await client.get( + f"{TMDB_BASE_URL}/movie/{tmdb_id}", + params={"api_key": settings.tmdb_api_key}, + timeout=5.0 + ) + if response.status_code == 200: + data = response.json() + # Get production_countries (list of objects with iso_3166_1) + if "production_countries" in data and data["production_countries"]: + countries = data["production_countries"] + if isinstance(countries, list) and len(countries) > 0: + country = countries[0] + if isinstance(country, dict) and "iso_3166_1" in country: + return country["iso_3166_1"].upper() + except Exception as e: + logger.debug(f"Failed to fetch TMDB data for movie {tmdb_id}: {e}") + + return None + + def extract_country_from_radarr(movie: Dict) -> Optional[str]: - """Extract country code from Radarr movie metadata""" + """Extract country code from Radarr movie metadata (synchronous check only)""" # Try productionCountries first if "productionCountries" in movie and movie["productionCountries"]: countries = movie["productionCountries"] @@ -89,40 +122,90 @@ def extract_country_from_radarr(movie: Dict) -> Optional[str]: if isinstance(country, dict) and "iso_3166_1" in country: return country["iso_3166_1"].upper() + # Note: TMDB lookup must be done asynchronously in sync_radarr() + return None + + +async def get_tmdb_tv_country(tmdb_id: int) -> Optional[str]: + """Get country code from TMDB API for a TV series""" + if not settings.tmdb_api_key: + return None + + try: + async with httpx.AsyncClient() as client: + response = await client.get( + f"{TMDB_BASE_URL}/tv/{tmdb_id}", + params={"api_key": settings.tmdb_api_key}, + timeout=5.0 + ) + if response.status_code == 200: + data = response.json() + # Get origin_country (list of ISO 3166-1 codes) + if "origin_country" in data and data["origin_country"]: + countries = data["origin_country"] + if isinstance(countries, list) and len(countries) > 0: + return countries[0].upper() + except Exception as e: + logger.debug(f"Failed to fetch TMDB data for TV {tmdb_id}: {e}") + return None def extract_country_from_sonarr(series: Dict) -> Optional[str]: - """Extract country code from Sonarr series metadata""" - # Sonarr doesn't always have country info directly - # Check network origin or other metadata - if "network" in series and series["network"]: - # Network name might hint at country, but not reliable - pass - - # Check if there's any country metadata - if "seriesMetadata" in series: + """Extract country code from Sonarr series metadata (synchronous check only)""" + # Try seriesMetadata first (if available) + if "seriesMetadata" in series and series["seriesMetadata"]: metadata = series["seriesMetadata"] if "originCountry" in metadata and metadata["originCountry"]: - # originCountry might be a list or string origin = metadata["originCountry"] if isinstance(origin, list) and len(origin) > 0: - return origin[0].upper() if len(origin[0]) == 2 else None + code = origin[0].upper() if len(origin[0]) == 2 else None + if code: + return code elif isinstance(origin, str) and len(origin) == 2: return origin.upper() + # Note: TMDB lookup must be done asynchronously in sync_sonarr() + return None + + +async def get_musicbrainz_artist_country(mbid: str) -> Optional[str]: + """Get country code from MusicBrainz API for an artist""" + try: + async with httpx.AsyncClient() as client: + # MusicBrainz API doesn't require an API key + response = await client.get( + f"https://musicbrainz.org/ws/2/artist/{mbid}", + params={"fmt": "json", "inc": "area-rels"}, + headers={"User-Agent": "MovieMap/1.0 (https://github.com/yourusername/movie-map)"}, + timeout=5.0 + ) + if response.status_code == 200: + data = response.json() + # Check area relations for country + if "relations" in data: + for relation in data["relations"]: + if relation.get("type") == "origin" and "area" in relation: + area = relation["area"] + if "iso-3166-1-codes" in area and area["iso-3166-1-codes"]: + codes = area["iso-3166-1-codes"] + if isinstance(codes, list) and len(codes) > 0: + return codes[0].upper() + except Exception as e: + logger.debug(f"Failed to fetch MusicBrainz data for artist {mbid}: {e}") + return None def extract_country_from_lidarr(artist: Dict) -> Optional[str]: - """Extract country code from Lidarr artist metadata""" - # Lidarr has a country field + """Extract country code from Lidarr artist metadata (synchronous check only)""" + # Check top-level country field if "country" in artist and artist["country"]: country = artist["country"] if isinstance(country, str) and len(country) == 2: return country.upper() - # Might be a country name, would need mapping + # Note: MusicBrainz lookup must be done asynchronously in sync_lidarr() return None @@ -176,6 +259,9 @@ async def upsert_media_item(source_kind: str, source_item_id: int, title: str, "INSERT INTO moviemap.media_country (media_item_id, country_code) VALUES (%s, %s)", (media_item_id, country_code) ) + else: + # Log when country extraction fails for debugging + logger.debug(f"Could not extract country for {source_kind} item {source_item_id}: {title}") await conn.commit() return media_item_id @@ -188,6 +274,17 @@ async def sync_radarr(): for movie in movies: try: + # Try to get country from TMDB if tmdbId is available and no country in Radarr data + country_code = extract_country_from_radarr(movie) + if not country_code and "tmdbId" in movie and movie["tmdbId"]: + country_code = await get_tmdb_movie_country(movie["tmdbId"]) + # Store TMDB country in the movie data for upsert_media_item to use + if country_code: + if "productionCountries" not in movie: + movie["productionCountries"] = [] + movie["productionCountries"].append({"iso_3166_1": country_code}) + + # Upsert media item (will extract country from the data we just prepared) await upsert_media_item( source_kind="radarr", source_item_id=movie.get("id"), @@ -210,6 +307,17 @@ async def sync_sonarr(): for s in series: try: + # Try to get country from TMDB if tmdbId is available and no country in Sonarr data + country_code = extract_country_from_sonarr(s) + if not country_code and "tmdbId" in s and s["tmdbId"]: + country_code = await get_tmdb_tv_country(s["tmdbId"]) + # Store TMDB country in the series data for upsert_media_item to use + if country_code: + if "seriesMetadata" not in s: + s["seriesMetadata"] = {} + s["seriesMetadata"]["originCountry"] = [country_code] + + # Upsert media item (will extract country from the data we just prepared) await upsert_media_item( source_kind="sonarr", source_item_id=s.get("id"), @@ -232,6 +340,17 @@ async def sync_lidarr(): for artist in artists: try: + # Try to get country from MusicBrainz if foreignArtistId (MBID) is available + country_code = extract_country_from_lidarr(artist) + if not country_code and "foreignArtistId" in artist and artist["foreignArtistId"]: + # foreignArtistId in Lidarr is the MusicBrainz ID + mbid = artist["foreignArtistId"] + country_code = await get_musicbrainz_artist_country(mbid) + # Store MusicBrainz country in the artist data + if country_code: + artist["country"] = country_code + + # Upsert media item (will extract country from the data we just prepared) await upsert_media_item( source_kind="lidarr", source_item_id=artist.get("id"),