Files
prem2resolve/premiere_to_resolve.py

405 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Premiere to Resolve XML Converter
Processes Adobe Premiere Pro XML exports to make them compatible with DaVinci Resolve on Linux.
Updates file paths and optionally converts MP4 files to MOV format.
"""
import os
import sys
import xml.etree.ElementTree as ET
import urllib.parse
import subprocess
from pathlib import Path
from typing import Dict, Tuple, Set
def extract_video_references(xml_file: str) -> Set[Tuple[str, str]]:
"""
Parse XML and extract all unique video file references.
Returns a set of tuples: (filename, pathurl)
"""
try:
tree = ET.parse(xml_file)
root = tree.getroot()
except ET.ParseError as e:
print(f"Error parsing XML file {xml_file}: {e}", file=sys.stderr)
return set()
video_refs = set()
# Find all <file> elements that contain <pathurl> tags
for file_elem in root.iter('file'):
name_elem = file_elem.find('name')
pathurl_elem = file_elem.find('pathurl')
if name_elem is not None and name_elem.text:
filename = name_elem.text.strip()
pathurl = ""
if pathurl_elem is not None and pathurl_elem.text:
pathurl = pathurl_elem.text.strip()
# Only include video files (check extension)
if filename.lower().endswith(('.mp4', '.mov', '.avi', '.mxf', '.mts', '.mkv')):
video_refs.add((filename, pathurl))
return video_refs
def decode_pathurl(pathurl: str) -> str:
"""
Decode a file:// URL path to a regular path and extract filename.
Example: file://localhost/C%3a/Users/.../file.mp4 -> file.mp4
"""
if not pathurl:
return ""
try:
# Remove file://localhost/ or file:/// prefix
if pathurl.startswith('file://localhost/'):
path_part = pathurl[17:] # Remove 'file://localhost/'
elif pathurl.startswith('file:///'):
path_part = pathurl[7:] # Remove 'file:///'
elif pathurl.startswith('file://'):
path_part = pathurl[7:] # Remove 'file://'
else:
path_part = pathurl
# URL decode
decoded = urllib.parse.unquote(path_part)
# Extract filename
filename = os.path.basename(decoded)
return filename
except Exception as e:
print(f"Error decoding pathurl {pathurl}: {e}", file=sys.stderr)
return ""
def find_local_files(video_refs: Set[Tuple[str, str]], directory: str) -> Dict[str, Dict[str, str]]:
"""
Match XML references with files in directory (searches recursively).
Returns a dict mapping original filename to:
{
'local_path': actual path found,
'original_pathurl': original pathurl from XML,
'needs_conversion': True if MP4 needs conversion
}
"""
directory_path = Path(directory)
file_mapping = {}
# Create a case-insensitive mapping of files in directory (recursive search)
local_files = {}
for file_path in directory_path.rglob('*'):
if file_path.is_file():
local_files[file_path.name.lower()] = str(file_path.resolve())
for filename, pathurl in video_refs:
# Try exact match first
local_path = None
if filename in local_files:
local_path = local_files[filename]
elif filename.lower() in local_files:
local_path = local_files[filename.lower()]
if local_path:
needs_conversion = filename.lower().endswith('.mp4')
file_mapping[filename] = {
'local_path': local_path,
'original_pathurl': pathurl,
'needs_conversion': needs_conversion
}
return file_mapping
def convert_mp4_to_mov(input_file: str, output_file: str) -> bool:
"""
Convert MP4 to MOV using ffmpeg with minimal quality loss.
Strategy:
1. Try copy codecs first (lossless if compatible)
2. Fallback to high-quality encoding
"""
# Check if output file already exists
if os.path.exists(output_file):
response = input(f"Output file {output_file} already exists. Overwrite? (y/n): ")
if response.lower() != 'y':
return False
# Try copy codecs first (lossless if compatible)
cmd_copy = [
'ffmpeg', '-i', input_file,
'-c:v', 'copy',
'-c:a', 'copy',
'-y', # Overwrite output file
output_file
]
try:
result = subprocess.run(
cmd_copy,
capture_output=True,
text=True,
timeout=3600 # 1 hour timeout
)
if result.returncode == 0:
print(f" Converted (copy codecs): {os.path.basename(input_file)} -> {os.path.basename(output_file)}")
return True
except subprocess.TimeoutExpired:
print(f" Timeout converting {input_file}", file=sys.stderr)
return False
except FileNotFoundError:
print("Error: ffmpeg not found. Please install ffmpeg.", file=sys.stderr)
return False
# Fallback to high-quality encoding
print(f" Copy codecs not compatible, using high-quality encoding for {os.path.basename(input_file)}")
cmd_encode = [
'ffmpeg', '-i', input_file,
'-c:v', 'libx264',
'-crf', '18', # High quality (lower = better quality)
'-preset', 'slow', # Better compression
'-c:a', 'copy', # Copy audio
'-y',
output_file
]
try:
result = subprocess.run(
cmd_encode,
capture_output=True,
text=True,
timeout=3600
)
if result.returncode == 0:
print(f" Converted (encoded): {os.path.basename(input_file)} -> {os.path.basename(output_file)}")
return True
else:
print(f" Error converting {input_file}: {result.stderr}", file=sys.stderr)
return False
except subprocess.TimeoutExpired:
print(f" Timeout converting {input_file}", file=sys.stderr)
return False
except Exception as e:
print(f" Error converting {input_file}: {e}", file=sys.stderr)
return False
def update_xml_paths(xml_file: str, file_mapping: Dict[str, Dict[str, str]]) -> bool:
"""
Update XML pathurl tags with new Linux file:// URLs.
Returns True if successful, False otherwise.
"""
try:
tree = ET.parse(xml_file)
root = tree.getroot()
except ET.ParseError as e:
print(f"Error parsing XML file {xml_file}: {e}", file=sys.stderr)
return False
updated_count = 0
# Find all <file> elements and update their pathurl and name if needed
for file_elem in root.iter('file'):
name_elem = file_elem.find('name')
pathurl_elem = file_elem.find('pathurl')
if name_elem is not None and name_elem.text:
filename = name_elem.text.strip()
if filename in file_mapping:
mapping = file_mapping[filename]
new_path = mapping['local_path']
new_filename = os.path.basename(new_path)
# Update name tag if filename changed (e.g., MP4 -> MOV)
if new_filename != filename:
name_elem.text = new_filename
# Convert to file:// URL format
# On Linux, file:// URLs should be file:///absolute/path (three slashes)
# Encode the path properly, but preserve forward slashes
# urllib.parse.quote with safe='/' will preserve slashes, so /path becomes /path
# Then file:///path gives us the correct three-slash format
encoded_path = urllib.parse.quote(new_path, safe='/')
new_pathurl = f"file://{encoded_path}"
if pathurl_elem is not None:
pathurl_elem.text = new_pathurl
updated_count += 1
else:
# Create pathurl element if it doesn't exist
pathurl_elem = ET.SubElement(file_elem, 'pathurl')
pathurl_elem.text = new_pathurl
updated_count += 1
if updated_count > 0:
try:
# Write back to file
tree.write(xml_file, encoding='UTF-8', xml_declaration=True)
print(f" Updated {updated_count} path references in {xml_file}")
return True
except Exception as e:
print(f"Error writing XML file {xml_file}: {e}", file=sys.stderr)
return False
return True
def check_ffmpeg() -> bool:
"""Check if ffmpeg is available."""
try:
subprocess.run(['ffmpeg', '-version'], capture_output=True, timeout=5)
return True
except (FileNotFoundError, subprocess.TimeoutExpired):
return False
def main():
"""Main workflow."""
# Get current directory
current_dir = os.getcwd()
print(f"Working directory: {current_dir}\n")
# Find all XML files
xml_files = [f for f in os.listdir(current_dir) if f.lower().endswith('.xml')]
if not xml_files:
print("No XML files found in current directory.")
return
print(f"Found {len(xml_files)} XML file(s):")
for xml_file in xml_files:
print(f" - {xml_file}")
print()
# Extract all video references from XML files
all_video_refs = set()
xml_to_refs = {}
for xml_file in xml_files:
refs = extract_video_references(xml_file)
all_video_refs.update(refs)
xml_to_refs[xml_file] = refs
print(f"Found {len(refs)} video reference(s) in {xml_file}")
print(f"\nTotal unique video files referenced: {len(all_video_refs)}\n")
# Find local files
file_mapping = find_local_files(all_video_refs, current_dir)
# Separate found and missing files
found_files = set(file_mapping.keys())
missing_files = {filename for filename, _ in all_video_refs if filename not in found_files}
# Show summary
print("File status:")
print(f" Found: {len(found_files)}")
print(f" Missing: {len(missing_files)}")
if found_files:
print("\nFound files:")
for filename in sorted(found_files):
mapping = file_mapping[filename]
status = " (needs MP4→MOV conversion)" if mapping['needs_conversion'] else ""
print(f" - {filename}{status}")
if missing_files:
print("\nMissing files:")
for filename in sorted(missing_files):
print(f" - {filename}")
print()
# Check for MP4 files that need conversion
mp4_files = {f: m for f, m in file_mapping.items() if m['needs_conversion']}
# Prompt for conversion
should_convert = False
if mp4_files:
response = input(f"Convert {len(mp4_files)} MP4 file(s) to MOV? (y/n): ")
should_convert = response.lower() == 'y'
if should_convert:
if not check_ffmpeg():
print("Error: ffmpeg not found. Please install ffmpeg to convert files.", file=sys.stderr)
should_convert = False
else:
print("\nConverting files...")
conversion_map = {}
for filename, mapping in mp4_files.items():
input_path = mapping['local_path']
# Create output filename with .mov extension in the same directory as input
input_dir = os.path.dirname(input_path)
base_name = os.path.splitext(filename)[0]
output_filename = f"{base_name}.mov"
output_path = os.path.join(input_dir, output_filename)
if convert_mp4_to_mov(input_path, output_path):
# Update mapping to point to converted file
conversion_map[filename] = output_filename
file_mapping[filename]['local_path'] = output_path
file_mapping[filename]['needs_conversion'] = False
if conversion_map:
print(f"\nSuccessfully converted {len(conversion_map)} file(s)")
# Note: file_mapping already updated above, no need to update again
else:
print("No MP4 files found that need conversion.")
print()
# Prompt for XML updates
response = input("Update XML file paths? (y/n): ")
should_update = response.lower() == 'y'
if should_update:
print("\nUpdating XML files...")
for xml_file in xml_files:
# Only update paths for files referenced in this XML
xml_specific_mapping = {
filename: mapping
for filename, mapping in file_mapping.items()
if filename in {f for f, _ in xml_to_refs[xml_file]}
}
if xml_specific_mapping:
update_xml_paths(xml_file, xml_specific_mapping)
else:
print(f" No matching files found for {xml_file}")
# Final report
print("\n" + "="*60)
print("Final Report")
print("="*60)
if missing_files:
print(f"\nMissing files ({len(missing_files)}):")
for filename in sorted(missing_files):
print(f" - {filename}")
else:
print("\nAll referenced files were found!")
if should_convert and mp4_files:
converted_count = sum(1 for f, m in file_mapping.items()
if f in mp4_files and not m['needs_conversion'])
print(f"\nConverted {converted_count} MP4 file(s) to MOV")
if should_update:
print(f"\nUpdated {len(xml_files)} XML file(s)")
if __name__ == '__main__':
main()