This commit is contained in:
Danilo Reyes
2025-12-03 22:43:54 -06:00
commit 94f8918e78
6 changed files with 764 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
__pycache__/

27
flake.lock generated Normal file
View File

@@ -0,0 +1,27 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1751274312,
"narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-24.11",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

34
flake.nix Normal file
View File

@@ -0,0 +1,34 @@
{
description = "RomM Duplicate Finder";
inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11";
outputs =
{ self, nixpkgs }:
let
system = "x86_64-linux";
pkgs = import nixpkgs { inherit system; };
pythonEnv = pkgs.python3.withPackages (
ps: with ps; [
requests
]
);
in
{
devShells.${system}.default = pkgs.mkShell {
buildInputs = [
pythonEnv
];
shellHook = ''
echo "RomM Duplicate Finder development environment"
echo "Python: $(python --version)"
echo "Available packages: requests"
'';
};
packages.${system}.default = pkgs.writeScriptBin "romm-dupe-finder" ''
#!${pythonEnv}/bin/python
${builtins.readFile ./main.py}
'';
};
}

382
main.py Normal file
View File

@@ -0,0 +1,382 @@
#!/usr/bin/env python3
import requests
from collections import defaultdict
from itertools import chain
from typing import Dict, List, Set, Optional, Tuple, Iterator
import re
session = requests.Session()
base = "http://localhost:4444"
session.auth = ("jawz", "overall-tuition-utensil-lecturer-fantastic-deferral")
def fetch_roms_page(session: requests.Session, base: str, offset: int) -> Dict:
"""Fetch a single page of ROMs."""
r = session.get(f"{base}/api/roms", params={"limit": 500, "offset": offset})
r.raise_for_status()
return r.json()
def fetch_all_roms(session: requests.Session, base: str) -> List[Dict]:
"""Fetch all ROMs using functional approach."""
def fetch_pages() -> Iterator[List[Dict]]:
offset = 0
limit = 500
total = None
while True:
page = fetch_roms_page(session, base, offset)
items = page.get("items", [])
yield items
if total is None:
total = page.get("total", 0)
if not items or offset + len(items) >= total:
break
offset += limit
return list(chain.from_iterable(fetch_pages()))
if __name__ == "__main__":
print("Fetching all ROMs...")
roms = fetch_all_roms(session, base)
print(f"Fetched {len(roms)} ROMs")
def normalize_name(name: Optional[str]) -> str:
"""Normalize ROM name for comparison."""
if not name:
return ""
return re.sub(r"\(.*?\)", "", re.sub(r"\[.*?\]", "", name)).strip().lower()
def get_metadata_id(rom: Dict) -> Optional[Tuple[str, int]]:
"""Get the best metadata ID for grouping ROMs."""
metadata_sources = [
("igdb", "igdb_id"),
("moby", "moby_id"),
("ss", "ss_id"),
("launchbox", "launchbox_id"),
]
for source_name, key in metadata_sources:
if value := rom.get(key):
return (source_name, value)
return None
def get_hash(rom: Dict) -> Optional[str]:
"""Get the best hash for exact duplicate detection."""
hash_sources = [
("sha1", "sha1_hash"),
("md5", "md5_hash"),
("crc", "crc_hash"),
]
for hash_type, key in hash_sources:
if value := rom.get(key):
return f"{hash_type}:{value}"
return None
def has_region(rom: Dict, region: str) -> bool:
"""Check if ROM has a specific region."""
regions = rom.get("regions", [])
if not regions:
return False
region_lower = region.lower()
return any(region_lower in r.lower() for r in regions)
def is_eur(rom: Dict) -> bool:
"""Check if ROM is European region."""
return has_region(rom, "eur") or has_region(rom, "europe") or has_region(rom, "pal")
def is_usa(rom: Dict) -> bool:
"""Check if ROM is USA region."""
return (
has_region(rom, "usa")
or has_region(rom, "north america")
or has_region(rom, "ntsc-u")
)
def is_japan(rom: Dict) -> bool:
"""Check if ROM is Japanese region."""
return (
has_region(rom, "jpn") or has_region(rom, "japan") or has_region(rom, "ntsc-j")
)
def get_language_count(rom: Dict) -> int:
"""Get the number of languages supported."""
languages = rom.get("languages", [])
return len(languages) if languages else 0
def get_metadata_score(rom: Dict) -> int:
"""Calculate a score based on metadata completeness."""
score_map = {
"igdb_id": 10,
"moby_id": 5,
"ss_id": 5,
"name": 3,
"summary": 2,
"alternative_names": 1,
}
return sum(score for key, score in score_map.items() if rom.get(key))
def get_region_score(rom: Dict) -> int:
"""Get region preference score. EUR > USA > Japan > Other."""
if is_eur(rom):
return 30
if is_usa(rom):
return 20
if is_japan(rom):
return 10
return 0
def rom_preference_score(rom: Dict) -> Tuple[int, int, int, int]:
"""
Calculate preference score for keeping a ROM.
Returns: (region_score, language_score, metadata_score, total_score)
Higher is better.
"""
region_score = get_region_score(rom)
language_score = get_language_count(rom) * 5
metadata_score = get_metadata_score(rom)
total_score = region_score + language_score + metadata_score
return (region_score, language_score, metadata_score, total_score)
def group_by_hash(roms: List[Dict]) -> Dict[str, List[Dict]]:
"""Group ROMs by hash (exact duplicates)."""
hash_groups = defaultdict(list)
for rom in filter(lambda r: get_hash(r) is not None, roms):
hash_groups[get_hash(rom)].append(rom)
return {
f"exact_hash_{h}": group for h, group in hash_groups.items() if len(group) > 1
}
def group_by_metadata(
roms: List[Dict], existing_groups: Dict[str, List[Dict]]
) -> Dict[str, List[Dict]]:
"""Group ROMs by metadata ID, excluding those already grouped."""
existing_ids = {r["id"] for group in existing_groups.values() for r in group}
metadata_groups = defaultdict(list)
for rom in filter(lambda r: get_metadata_id(r) is not None, roms):
if rom["id"] not in existing_ids:
metadata_groups[get_metadata_id(rom)].append(rom)
def should_add_group(group: List[Dict]) -> bool:
if len(group) <= 1:
return False
group_hashes = {get_hash(r) for r in group if get_hash(r)}
return len(group_hashes) > 1 or not group_hashes
return {
f"metadata_{meta_id[0]}_{meta_id[1]}": group
for meta_id, group in metadata_groups.items()
if should_add_group(group)
}
def group_by_name(
roms: List[Dict], existing_groups: Dict[str, List[Dict]]
) -> Dict[str, List[Dict]]:
"""Group ROMs by normalized name, excluding those already grouped."""
existing_ids = {r["id"] for group in existing_groups.values() for r in group}
name_groups = defaultdict(list)
for rom in filter(lambda r: normalize_name(r.get("name")), roms):
if rom["id"] not in existing_ids:
name_groups[normalize_name(rom.get("name"))].append(rom)
return {
f"name_{name[:50]}": group
for name, group in name_groups.items()
if len(group) > 1
}
def find_duplicates(roms: List[Dict]) -> Dict[str, List[Dict]]:
"""Group ROMs by various duplicate criteria."""
hash_groups = group_by_hash(roms)
metadata_groups = group_by_metadata(roms, hash_groups)
name_groups = group_by_name(roms, {**hash_groups, **metadata_groups})
return {**hash_groups, **metadata_groups, **name_groups}
def score_roms(group: List[Dict]) -> List[Tuple[Dict, Tuple[int, int, int, int]]]:
"""Score all ROMs in a group."""
return [(rom, rom_preference_score(rom)) for rom in group]
def select_best_rom(
scored_roms: List[Tuple[Dict, Tuple[int, int, int, int]]],
) -> Tuple[Dict, List[Dict]]:
"""Select the best ROM and return it with the rest to delete."""
sorted_roms = sorted(scored_roms, key=lambda x: x[1][3], reverse=True)
best_rom, _ = sorted_roms[0]
delete_roms = [rom for rom, _ in sorted_roms[1:]]
return best_rom, delete_roms
def print_rom_info(rom: Dict, score: Tuple[int, int, int, int], prefix: str = " "):
"""Print ROM information."""
print(f"{prefix}Name: {rom.get('name', 'Unknown')} (ID: {rom['id']})")
print(f"{prefix} Region: {rom.get('regions', [])}")
print(f"{prefix} Languages: {rom.get('languages', [])}")
print(f"{prefix} Score: {score}")
print(f"{prefix} File: {rom.get('fs_name', 'N/A')}")
def process_group(
group_name: str, group: List[Dict], processed_ids: Set[int]
) -> List[Dict]:
"""Process a duplicate group and return ROMs to delete."""
if len(group) <= 1:
return []
group_ids = {r["id"] for r in group}
if group_ids & processed_ids:
return []
scored_roms = score_roms(group)
best_rom, delete_roms = select_best_rom(scored_roms)
print(f"Group: {group_name}")
print_rom_info(best_rom, scored_roms[0][1], " Keeping: ")
for rom in delete_roms:
score = rom_preference_score(rom)
print_rom_info(rom, score, " Delete: ")
processed_ids.add(rom["id"])
print()
return delete_roms
def get_kept_roms(groups: Dict[str, List[Dict]]) -> List[Dict]:
"""Get the ROMs that were kept (best from each group)."""
kept = []
for group in groups.values():
if len(group) <= 1:
continue
scored_roms = score_roms(group)
best_rom, _ = select_best_rom(scored_roms)
kept.append(best_rom)
return kept
def recommend_roms_to_delete(roms: List[Dict]) -> Tuple[List[Dict], List[Dict]]:
"""Find and recommend ROMs to delete based on duplicate analysis.
Returns: (roms_to_delete, roms_kept)
"""
groups = find_duplicates(roms)
processed_ids = set()
print(f"\nFound {len(groups)} duplicate groups\n")
to_delete = list(
chain.from_iterable(
process_group(group_name, group, processed_ids)
for group_name, group in groups.items()
)
)
kept = get_kept_roms(groups)
return to_delete, kept
def delete_roms(session: requests.Session, base: str, rom_ids: List[int]) -> Dict:
"""Delete ROMs using the API."""
url = f"{base}/api/roms/delete"
payload = {"roms": rom_ids, "delete_from_fs": rom_ids}
r = session.post(url, json=payload)
r.raise_for_status()
return r.json()
def format_rom_list(roms: List[Dict]) -> str:
"""Format a list of ROMs for display."""
if not roms:
return " (none)"
return "\n".join(
f" - {rom.get('name', 'Unknown')} (ID: {rom['id']}) - {rom.get('fs_name', 'N/A')}"
for rom in sorted(roms, key=lambda r: r.get("name", ""))
)
def format_summary(
roms: List[Dict],
roms_to_delete: List[Dict],
roms_kept: List[Dict],
) -> str:
"""Format the summary output."""
delete_ids = [r["id"] for r in roms_to_delete]
separator = "=" * 60
return f"""
{separator}
SUMMARY
{separator}
Total ROMs analyzed: {len(roms)}
ROMs recommended for deletion: {len(roms_to_delete)}
ROMs kept (originals): {len(roms_kept)}
{separator}
ROM IDs to delete: {delete_ids}
{separator}
"""
def format_deletion_results(deleted: List[Dict], kept: List[Dict]) -> str:
"""Format the results after deletion."""
separator = "=" * 60
return f"""
{separator}
DELETION RESULTS
{separator}
ROMs DELETED ({len(deleted)}):
{format_rom_list(deleted)}
ROMs KEPT (originals) ({len(kept)}):
{format_rom_list(kept)}
{separator}
"""
if __name__ == "__main__":
roms_to_delete, roms_kept = recommend_roms_to_delete(roms)
print(format_summary(roms, roms_to_delete, roms_kept, base))
if roms_to_delete:
delete_ids = [r["id"] for r in roms_to_delete]
print(f"Deleting {len(roms_to_delete)} ROMs...")
try:
result = delete_roms(session, base, delete_ids)
print("✓ Deletion successful!")
print(format_deletion_results(roms_to_delete, roms_kept))
except Exception as e:
print(f"✗ Error during deletion: {e}")
print("ROMs were NOT deleted. Please check the error above.")
else:
print("No ROMs to delete.")

1
openapi.json Normal file

File diff suppressed because one or more lines are too long

319
rename_roms.py Normal file
View File

@@ -0,0 +1,319 @@
#!/usr/bin/env python3
import os
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from main import fetch_all_roms, session, base
DEFAULT_HOST_ROMS_PATH = "/home/jawz/Games/roms"
def translate_docker_path(
docker_path: str, host_path: str = DEFAULT_HOST_ROMS_PATH
) -> str:
"""Translate Docker container path to host path."""
if not docker_path:
return docker_path
if docker_path.startswith(host_path):
return docker_path
docker_prefixes = ["/roms", "/data/roms", "/mnt/roms"]
matching_prefix = next(
(prefix for prefix in docker_prefixes if docker_path.startswith(prefix)), None
)
if matching_prefix:
relative_path = docker_path[len(matching_prefix) :].lstrip("/")
return os.path.join(host_path, relative_path)
path_parts = Path(docker_path).parts
if len(path_parts) > 1:
return str(Path(host_path) / Path(*path_parts[1:]))
return docker_path
def is_matched(rom: Dict) -> bool:
"""Check if ROM is matched (has metadata)."""
metadata_keys = ["igdb_id", "moby_id", "ss_id", "launchbox_id"]
return any(rom.get(key) for key in metadata_keys)
def should_skip_rom(rom: Dict) -> bool:
"""Check if ROM should be skipped (e.g., notgame, ZZZ prefix, etc.)."""
name = rom.get("name", "")
if not name:
return True
name_lower = name.lower()
skip_patterns = ["(notgame)", "zzz", "zzz:"]
return any(pattern in name_lower for pattern in skip_patterns)
def already_has_proper_name(rom: Dict) -> bool:
"""Check if ROM already has the proper name format."""
if not (fs_name := rom.get("fs_name")):
return False
if not (rom_name := rom.get("name")):
return False
if not (extension := get_extension(rom)):
return False
sanitized_name = sanitize_filename(rom_name)
region_code = get_region_code(rom)
if region_code:
expected_name = f"{sanitized_name} ({region_code}).{extension}"
else:
expected_name = f"{sanitized_name}.{extension}"
current_name = os.path.basename(fs_name)
return current_name.lower() == expected_name.lower()
def is_valid_for_rename(rom: Dict) -> bool:
"""Check if ROM is valid for renaming (matched, not skipped, and needs renaming)."""
return (
is_matched(rom)
and not should_skip_rom(rom)
and not already_has_proper_name(rom)
)
def get_region_code(rom: Dict) -> Optional[str]:
"""Extract region code from ROM regions.
Returns single letter code like 'U', 'E', 'J' etc.
"""
regions = rom.get("regions", [])
if not regions:
return None
region_map = {
"usa": "U",
"north america": "U",
"ntsc-u": "U",
"eur": "E",
"europe": "E",
"pal": "E",
"jpn": "J",
"japan": "J",
"ntsc-j": "J",
"asia": "A",
"australia": "A",
"world": "W",
}
def find_matching_code(region: str) -> Optional[str]:
region_lower = region.lower()
return next(
(code for key, code in region_map.items() if key in region_lower), None
)
matching_code = next(
(code for region in regions if (code := find_matching_code(region))), None
)
if matching_code:
return matching_code
if regions and len(regions[0]) >= 1:
return regions[0].upper()[0]
return None
def sanitize_filename(name: str) -> str:
"""Sanitize filename to remove invalid characters."""
invalid_chars = '<>:"/\\|?*'
return name.translate(str.maketrans("", "", invalid_chars)).strip(" .")
def get_extension(rom: Dict) -> Optional[str]:
"""Extract file extension from ROM."""
if extension := rom.get("fs_extension"):
return extension
if fs_name := rom.get("fs_name"):
ext = os.path.splitext(fs_name)[1].lstrip(".")
return ext if ext else None
return None
def generate_new_filename(rom: Dict) -> Optional[str]:
"""Generate new filename in format: {rom_name} ({region}).{ext} or {rom_name}.{ext} if no region"""
if not (rom_name := rom.get("name")):
return None
if not (extension := get_extension(rom)):
return None
sanitized_name = sanitize_filename(rom_name)
region_code = get_region_code(rom)
if region_code:
return f"{sanitized_name} ({region_code}).{extension}"
return f"{sanitized_name}.{extension}"
def rename_rom_file(
rom: Dict, dry_run: bool = True, host_path: str = DEFAULT_HOST_ROMS_PATH
) -> Tuple[bool, str, str]:
"""Rename a ROM file.
Returns: (success, old_path, new_path_or_error)
"""
if not (full_path := rom.get("full_path")):
return False, "", "No full_path available"
translated_path = translate_docker_path(full_path, host_path)
if not os.path.exists(translated_path):
return False, translated_path, f"File does not exist: {translated_path}"
if not (new_filename := generate_new_filename(rom)):
return False, translated_path, "Could not generate new filename"
old_path = Path(translated_path)
new_path = old_path.parent / new_filename
if new_path.exists() and new_path != old_path:
return False, str(old_path), f"Target file already exists: {new_path}"
if dry_run:
return True, str(old_path), str(new_path)
try:
os.rename(str(old_path), str(new_path))
return True, str(old_path), str(new_path)
except Exception as e:
return False, str(old_path), f"Error: {e}"
def process_rom_rename(
rom: Dict, dry_run: bool, host_path: str = DEFAULT_HOST_ROMS_PATH
) -> Tuple[Dict, bool, str, str]:
"""Process a single ROM rename."""
success, old_path, new_path = rename_rom_file(
rom, dry_run=dry_run, host_path=host_path
)
return (rom, success, old_path, new_path)
def format_rename_result(rom: Dict, old_path: str, new_path: str) -> str:
"""Format a rename result for display."""
old_name = os.path.basename(old_path)
new_name = os.path.basename(new_path)
return (
f" {rom.get('name', 'Unknown')} (ID: {rom['id']})\n"
f" {old_name}\n"
f" -> {new_name}\n"
)
def format_error_result(rom: Dict, old_path: str, error: str) -> str:
"""Format an error result for display."""
return (
f" {rom.get('name', 'Unknown')} (ID: {rom['id']})\n"
f" {old_path}\n"
f" Error: {error}\n"
)
def print_results(results: List[Tuple[Dict, bool, str, str]]) -> None:
"""Print rename results."""
successful = [r for r in results if r[1]]
failed = [r for r in results if not r[1]]
print(f"Successful: {len(successful)}")
print(f"Failed: {len(failed)}\n")
if successful:
print("Successfully processed:")
print(
"".join(
format_rename_result(rom, old_path, new_path)
for rom, _, old_path, new_path in successful
)
)
if failed:
print("Failed:")
print(
"".join(
format_error_result(rom, old_path, error)
for rom, _, old_path, error in failed
)
)
def rename_matched_roms(
dry_run: bool = True, host_path: str = DEFAULT_HOST_ROMS_PATH
) -> None:
"""Rename all matched ROMs."""
print("Fetching all ROMs...")
roms = fetch_all_roms(session, base)
print(f"Fetched {len(roms)} ROMs\n")
matched_roms = list(filter(is_matched, roms))
valid_roms = list(filter(is_valid_for_rename, roms))
already_named = [
rom
for rom in matched_roms
if not should_skip_rom(rom) and already_has_proper_name(rom)
]
print(f"Found {len(matched_roms)} matched ROMs")
print(f"Found {len(valid_roms)} valid ROMs for renaming")
if already_named:
print(f"Skipped {len(already_named)} ROMs that already have proper names\n")
else:
print()
if not valid_roms:
print("No valid ROMs to rename.")
return
mode = "DRY RUN" if dry_run else "RENAMING"
print(f"{'='*60}")
print(f"{mode} MODE")
print(f"Host path: {host_path}")
print(f"{'='*60}\n")
results = [process_rom_rename(rom, dry_run, host_path) for rom in valid_roms]
print_results(results)
if dry_run and any(r[1] for r in results):
print(f"{'='*60}")
print("This was a DRY RUN. No files were actually renamed.")
print("Run with --execute to perform the renames.")
print(f"{'='*60}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Rename matched ROMs")
parser.add_argument(
"--execute",
action="store_true",
help="Actually perform the renames (default is dry run)",
)
parser.add_argument(
"--host-path",
type=str,
default=os.getenv("ROMS_HOST_PATH", DEFAULT_HOST_ROMS_PATH),
help=f"Host path for ROMs (default: {DEFAULT_HOST_ROMS_PATH})",
)
args = parser.parse_args()
rename_matched_roms(dry_run=not args.execute, host_path=args.host_path)