#!/usr/bin/env python3 """ Premiere to Resolve XML Converter Processes Adobe Premiere Pro XML exports to make them compatible with DaVinci Resolve on Linux. Updates file paths and optionally converts MP4 files to MOV format. """ import os import sys import xml.etree.ElementTree as ET import urllib.parse import subprocess from pathlib import Path from typing import Dict, Tuple, Set def extract_video_references(xml_file: str) -> Set[Tuple[str, str]]: """ Parse XML and extract all unique video file references. Returns a set of tuples: (filename, pathurl) """ try: tree = ET.parse(xml_file) root = tree.getroot() except ET.ParseError as e: print(f"Error parsing XML file {xml_file}: {e}", file=sys.stderr) return set() video_refs = set() # Find all elements that contain tags for file_elem in root.iter('file'): name_elem = file_elem.find('name') pathurl_elem = file_elem.find('pathurl') if name_elem is not None and name_elem.text: filename = name_elem.text.strip() pathurl = "" if pathurl_elem is not None and pathurl_elem.text: pathurl = pathurl_elem.text.strip() # Only include video files (check extension) if filename.lower().endswith(('.mp4', '.mov', '.avi', '.mxf', '.mts', '.mkv')): video_refs.add((filename, pathurl)) return video_refs def decode_pathurl(pathurl: str) -> str: """ Decode a file:// URL path to a regular path and extract filename. Example: file://localhost/C%3a/Users/.../file.mp4 -> file.mp4 """ if not pathurl: return "" try: # Remove file://localhost/ or file:/// prefix if pathurl.startswith('file://localhost/'): path_part = pathurl[17:] # Remove 'file://localhost/' elif pathurl.startswith('file:///'): path_part = pathurl[7:] # Remove 'file:///' elif pathurl.startswith('file://'): path_part = pathurl[7:] # Remove 'file://' else: path_part = pathurl # URL decode decoded = urllib.parse.unquote(path_part) # Extract filename filename = os.path.basename(decoded) return filename except Exception as e: print(f"Error decoding pathurl {pathurl}: {e}", file=sys.stderr) return "" def find_local_files(video_refs: Set[Tuple[str, str]], directory: str) -> Dict[str, Dict[str, str]]: """ Match XML references with files in directory (searches recursively). Returns a dict mapping original filename to: { 'local_path': actual path found, 'original_pathurl': original pathurl from XML, 'needs_conversion': True if MP4 needs conversion } """ directory_path = Path(directory) file_mapping = {} # Create a case-insensitive mapping of files in directory (recursive search) local_files = {} for file_path in directory_path.rglob('*'): if file_path.is_file(): local_files[file_path.name.lower()] = str(file_path.resolve()) for filename, pathurl in video_refs: # Try exact match first local_path = None if filename in local_files: local_path = local_files[filename] elif filename.lower() in local_files: local_path = local_files[filename.lower()] if local_path: needs_conversion = filename.lower().endswith('.mp4') file_mapping[filename] = { 'local_path': local_path, 'original_pathurl': pathurl, 'needs_conversion': needs_conversion } return file_mapping def convert_mp4_to_mov(input_file: str, output_file: str) -> bool: """ Convert MP4 to MOV using ffmpeg with minimal quality loss. Strategy: 1. Try copy codecs first (lossless if compatible) 2. Fallback to high-quality encoding """ # Check if output file already exists if os.path.exists(output_file): response = input(f"Output file {output_file} already exists. Overwrite? (y/n): ") if response.lower() != 'y': return False # Try copy codecs first (lossless if compatible) cmd_copy = [ 'ffmpeg', '-i', input_file, '-c:v', 'copy', '-c:a', 'copy', '-y', # Overwrite output file output_file ] try: result = subprocess.run( cmd_copy, capture_output=True, text=True, timeout=3600 # 1 hour timeout ) if result.returncode == 0: print(f" Converted (copy codecs): {os.path.basename(input_file)} -> {os.path.basename(output_file)}") return True except subprocess.TimeoutExpired: print(f" Timeout converting {input_file}", file=sys.stderr) return False except FileNotFoundError: print("Error: ffmpeg not found. Please install ffmpeg.", file=sys.stderr) return False # Fallback to high-quality encoding print(f" Copy codecs not compatible, using high-quality encoding for {os.path.basename(input_file)}") cmd_encode = [ 'ffmpeg', '-i', input_file, '-c:v', 'libx264', '-crf', '18', # High quality (lower = better quality) '-preset', 'slow', # Better compression '-c:a', 'copy', # Copy audio '-y', output_file ] try: result = subprocess.run( cmd_encode, capture_output=True, text=True, timeout=3600 ) if result.returncode == 0: print(f" Converted (encoded): {os.path.basename(input_file)} -> {os.path.basename(output_file)}") return True else: print(f" Error converting {input_file}: {result.stderr}", file=sys.stderr) return False except subprocess.TimeoutExpired: print(f" Timeout converting {input_file}", file=sys.stderr) return False except Exception as e: print(f" Error converting {input_file}: {e}", file=sys.stderr) return False def update_xml_paths(xml_file: str, file_mapping: Dict[str, Dict[str, str]]) -> bool: """ Update XML pathurl tags with new Linux file:// URLs. Returns True if successful, False otherwise. """ try: tree = ET.parse(xml_file) root = tree.getroot() except ET.ParseError as e: print(f"Error parsing XML file {xml_file}: {e}", file=sys.stderr) return False updated_count = 0 # Find all elements and update their pathurl and name if needed for file_elem in root.iter('file'): name_elem = file_elem.find('name') pathurl_elem = file_elem.find('pathurl') if name_elem is not None and name_elem.text: filename = name_elem.text.strip() if filename in file_mapping: mapping = file_mapping[filename] new_path = mapping['local_path'] new_filename = os.path.basename(new_path) # Update name tag if filename changed (e.g., MP4 -> MOV) if new_filename != filename: name_elem.text = new_filename # Convert to file:// URL format # On Linux, file:// URLs should be file:///absolute/path (three slashes) # Encode the path properly, but preserve forward slashes # urllib.parse.quote with safe='/' will preserve slashes, so /path becomes /path # Then file:///path gives us the correct three-slash format encoded_path = urllib.parse.quote(new_path, safe='/') new_pathurl = f"file://{encoded_path}" if pathurl_elem is not None: pathurl_elem.text = new_pathurl updated_count += 1 else: # Create pathurl element if it doesn't exist pathurl_elem = ET.SubElement(file_elem, 'pathurl') pathurl_elem.text = new_pathurl updated_count += 1 if updated_count > 0: try: # Write back to file tree.write(xml_file, encoding='UTF-8', xml_declaration=True) print(f" Updated {updated_count} path references in {xml_file}") return True except Exception as e: print(f"Error writing XML file {xml_file}: {e}", file=sys.stderr) return False return True def check_ffmpeg() -> bool: """Check if ffmpeg is available.""" try: subprocess.run(['ffmpeg', '-version'], capture_output=True, timeout=5) return True except (FileNotFoundError, subprocess.TimeoutExpired): return False def main(): """Main workflow.""" # Get current directory current_dir = os.getcwd() print(f"Working directory: {current_dir}\n") # Find all XML files xml_files = [f for f in os.listdir(current_dir) if f.lower().endswith('.xml')] if not xml_files: print("No XML files found in current directory.") return print(f"Found {len(xml_files)} XML file(s):") for xml_file in xml_files: print(f" - {xml_file}") print() # Extract all video references from XML files all_video_refs = set() xml_to_refs = {} for xml_file in xml_files: refs = extract_video_references(xml_file) all_video_refs.update(refs) xml_to_refs[xml_file] = refs print(f"Found {len(refs)} video reference(s) in {xml_file}") print(f"\nTotal unique video files referenced: {len(all_video_refs)}\n") # Find local files file_mapping = find_local_files(all_video_refs, current_dir) # Separate found and missing files found_files = set(file_mapping.keys()) missing_files = {filename for filename, _ in all_video_refs if filename not in found_files} # Show summary print("File status:") print(f" Found: {len(found_files)}") print(f" Missing: {len(missing_files)}") if found_files: print("\nFound files:") for filename in sorted(found_files): mapping = file_mapping[filename] status = " (needs MP4→MOV conversion)" if mapping['needs_conversion'] else "" print(f" - {filename}{status}") if missing_files: print("\nMissing files:") for filename in sorted(missing_files): print(f" - {filename}") print() # Check for MP4 files that need conversion mp4_files = {f: m for f, m in file_mapping.items() if m['needs_conversion']} # Prompt for conversion should_convert = False if mp4_files: response = input(f"Convert {len(mp4_files)} MP4 file(s) to MOV? (y/n): ") should_convert = response.lower() == 'y' if should_convert: if not check_ffmpeg(): print("Error: ffmpeg not found. Please install ffmpeg to convert files.", file=sys.stderr) should_convert = False else: print("\nConverting files...") conversion_map = {} for filename, mapping in mp4_files.items(): input_path = mapping['local_path'] # Create output filename with .mov extension base_name = os.path.splitext(filename)[0] output_filename = f"{base_name}.mov" output_path = os.path.join(current_dir, output_filename) if convert_mp4_to_mov(input_path, output_path): # Update mapping to point to converted file conversion_map[filename] = output_filename file_mapping[filename]['local_path'] = output_path file_mapping[filename]['needs_conversion'] = False if conversion_map: print(f"\nSuccessfully converted {len(conversion_map)} file(s)") # Update file_mapping for converted files for original_filename, new_filename in conversion_map.items(): if original_filename in file_mapping: new_path = os.path.join(current_dir, new_filename) file_mapping[original_filename]['local_path'] = new_path else: print("No MP4 files found that need conversion.") print() # Prompt for XML updates response = input("Update XML file paths? (y/n): ") should_update = response.lower() == 'y' if should_update: print("\nUpdating XML files...") for xml_file in xml_files: # Only update paths for files referenced in this XML xml_specific_mapping = { filename: mapping for filename, mapping in file_mapping.items() if filename in {f for f, _ in xml_to_refs[xml_file]} } if xml_specific_mapping: update_xml_paths(xml_file, xml_specific_mapping) else: print(f" No matching files found for {xml_file}") # Final report print("\n" + "="*60) print("Final Report") print("="*60) if missing_files: print(f"\nMissing files ({len(missing_files)}):") for filename in sorted(missing_files): print(f" - {filename}") else: print("\nAll referenced files were found!") if should_convert and mp4_files: converted_count = sum(1 for f, m in file_mapping.items() if f in mp4_files and not m['needs_conversion']) print(f"\nConverted {converted_count} MP4 file(s) to MOV") if should_update: print(f"\nUpdated {len(xml_files)} XML file(s)") if __name__ == '__main__': main()