405 lines
14 KiB
Python
405 lines
14 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Premiere to Resolve XML Converter
|
|
|
|
Processes Adobe Premiere Pro XML exports to make them compatible with DaVinci Resolve on Linux.
|
|
Updates file paths and optionally converts MP4 files to MOV format.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import xml.etree.ElementTree as ET
|
|
import urllib.parse
|
|
import subprocess
|
|
from pathlib import Path
|
|
from typing import Dict, Tuple, Set
|
|
|
|
|
|
def extract_video_references(xml_file: str) -> Set[Tuple[str, str]]:
|
|
"""
|
|
Parse XML and extract all unique video file references.
|
|
|
|
Returns a set of tuples: (filename, pathurl)
|
|
"""
|
|
try:
|
|
tree = ET.parse(xml_file)
|
|
root = tree.getroot()
|
|
except ET.ParseError as e:
|
|
print(f"Error parsing XML file {xml_file}: {e}", file=sys.stderr)
|
|
return set()
|
|
|
|
video_refs = set()
|
|
|
|
# Find all <file> elements that contain <pathurl> tags
|
|
for file_elem in root.iter('file'):
|
|
name_elem = file_elem.find('name')
|
|
pathurl_elem = file_elem.find('pathurl')
|
|
|
|
if name_elem is not None and name_elem.text:
|
|
filename = name_elem.text.strip()
|
|
pathurl = ""
|
|
|
|
if pathurl_elem is not None and pathurl_elem.text:
|
|
pathurl = pathurl_elem.text.strip()
|
|
|
|
# Only include video files (check extension)
|
|
if filename.lower().endswith(('.mp4', '.mov', '.avi', '.mxf', '.mts', '.mkv')):
|
|
video_refs.add((filename, pathurl))
|
|
|
|
return video_refs
|
|
|
|
|
|
def decode_pathurl(pathurl: str) -> str:
|
|
"""
|
|
Decode a file:// URL path to a regular path and extract filename.
|
|
|
|
Example: file://localhost/C%3a/Users/.../file.mp4 -> file.mp4
|
|
"""
|
|
if not pathurl:
|
|
return ""
|
|
|
|
try:
|
|
# Remove file://localhost/ or file:/// prefix
|
|
if pathurl.startswith('file://localhost/'):
|
|
path_part = pathurl[17:] # Remove 'file://localhost/'
|
|
elif pathurl.startswith('file:///'):
|
|
path_part = pathurl[7:] # Remove 'file:///'
|
|
elif pathurl.startswith('file://'):
|
|
path_part = pathurl[7:] # Remove 'file://'
|
|
else:
|
|
path_part = pathurl
|
|
|
|
# URL decode
|
|
decoded = urllib.parse.unquote(path_part)
|
|
|
|
# Extract filename
|
|
filename = os.path.basename(decoded)
|
|
return filename
|
|
except Exception as e:
|
|
print(f"Error decoding pathurl {pathurl}: {e}", file=sys.stderr)
|
|
return ""
|
|
|
|
|
|
def find_local_files(video_refs: Set[Tuple[str, str]], directory: str) -> Dict[str, Dict[str, str]]:
|
|
"""
|
|
Match XML references with files in directory (searches recursively).
|
|
|
|
Returns a dict mapping original filename to:
|
|
{
|
|
'local_path': actual path found,
|
|
'original_pathurl': original pathurl from XML,
|
|
'needs_conversion': True if MP4 needs conversion
|
|
}
|
|
"""
|
|
directory_path = Path(directory)
|
|
file_mapping = {}
|
|
|
|
# Create a case-insensitive mapping of files in directory (recursive search)
|
|
local_files = {}
|
|
for file_path in directory_path.rglob('*'):
|
|
if file_path.is_file():
|
|
local_files[file_path.name.lower()] = str(file_path.resolve())
|
|
|
|
for filename, pathurl in video_refs:
|
|
# Try exact match first
|
|
local_path = None
|
|
if filename in local_files:
|
|
local_path = local_files[filename]
|
|
elif filename.lower() in local_files:
|
|
local_path = local_files[filename.lower()]
|
|
|
|
if local_path:
|
|
needs_conversion = filename.lower().endswith('.mp4')
|
|
file_mapping[filename] = {
|
|
'local_path': local_path,
|
|
'original_pathurl': pathurl,
|
|
'needs_conversion': needs_conversion
|
|
}
|
|
|
|
return file_mapping
|
|
|
|
|
|
def convert_mp4_to_mov(input_file: str, output_file: str) -> bool:
|
|
"""
|
|
Convert MP4 to MOV using ffmpeg with minimal quality loss.
|
|
|
|
Strategy:
|
|
1. Try copy codecs first (lossless if compatible)
|
|
2. Fallback to high-quality encoding
|
|
"""
|
|
# Check if output file already exists
|
|
if os.path.exists(output_file):
|
|
response = input(f"Output file {output_file} already exists. Overwrite? (y/n): ")
|
|
if response.lower() != 'y':
|
|
return False
|
|
|
|
# Try copy codecs first (lossless if compatible)
|
|
cmd_copy = [
|
|
'ffmpeg', '-i', input_file,
|
|
'-c:v', 'copy',
|
|
'-c:a', 'copy',
|
|
'-y', # Overwrite output file
|
|
output_file
|
|
]
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd_copy,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=3600 # 1 hour timeout
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
print(f" Converted (copy codecs): {os.path.basename(input_file)} -> {os.path.basename(output_file)}")
|
|
return True
|
|
except subprocess.TimeoutExpired:
|
|
print(f" Timeout converting {input_file}", file=sys.stderr)
|
|
return False
|
|
except FileNotFoundError:
|
|
print("Error: ffmpeg not found. Please install ffmpeg.", file=sys.stderr)
|
|
return False
|
|
|
|
# Fallback to high-quality encoding
|
|
print(f" Copy codecs not compatible, using high-quality encoding for {os.path.basename(input_file)}")
|
|
cmd_encode = [
|
|
'ffmpeg', '-i', input_file,
|
|
'-c:v', 'libx264',
|
|
'-crf', '18', # High quality (lower = better quality)
|
|
'-preset', 'slow', # Better compression
|
|
'-c:a', 'copy', # Copy audio
|
|
'-y',
|
|
output_file
|
|
]
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
cmd_encode,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=3600
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
print(f" Converted (encoded): {os.path.basename(input_file)} -> {os.path.basename(output_file)}")
|
|
return True
|
|
else:
|
|
print(f" Error converting {input_file}: {result.stderr}", file=sys.stderr)
|
|
return False
|
|
except subprocess.TimeoutExpired:
|
|
print(f" Timeout converting {input_file}", file=sys.stderr)
|
|
return False
|
|
except Exception as e:
|
|
print(f" Error converting {input_file}: {e}", file=sys.stderr)
|
|
return False
|
|
|
|
|
|
def update_xml_paths(xml_file: str, file_mapping: Dict[str, Dict[str, str]]) -> bool:
|
|
"""
|
|
Update XML pathurl tags with new Linux file:// URLs.
|
|
|
|
Returns True if successful, False otherwise.
|
|
"""
|
|
try:
|
|
tree = ET.parse(xml_file)
|
|
root = tree.getroot()
|
|
except ET.ParseError as e:
|
|
print(f"Error parsing XML file {xml_file}: {e}", file=sys.stderr)
|
|
return False
|
|
|
|
updated_count = 0
|
|
|
|
# Find all <file> elements and update their pathurl and name if needed
|
|
for file_elem in root.iter('file'):
|
|
name_elem = file_elem.find('name')
|
|
pathurl_elem = file_elem.find('pathurl')
|
|
|
|
if name_elem is not None and name_elem.text:
|
|
filename = name_elem.text.strip()
|
|
|
|
if filename in file_mapping:
|
|
mapping = file_mapping[filename]
|
|
new_path = mapping['local_path']
|
|
new_filename = os.path.basename(new_path)
|
|
|
|
# Update name tag if filename changed (e.g., MP4 -> MOV)
|
|
if new_filename != filename:
|
|
name_elem.text = new_filename
|
|
|
|
# Convert to file:// URL format
|
|
# On Linux, file:// URLs should be file:///absolute/path (three slashes)
|
|
# Encode the path properly, but preserve forward slashes
|
|
# urllib.parse.quote with safe='/' will preserve slashes, so /path becomes /path
|
|
# Then file:///path gives us the correct three-slash format
|
|
encoded_path = urllib.parse.quote(new_path, safe='/')
|
|
new_pathurl = f"file://{encoded_path}"
|
|
|
|
if pathurl_elem is not None:
|
|
pathurl_elem.text = new_pathurl
|
|
updated_count += 1
|
|
else:
|
|
# Create pathurl element if it doesn't exist
|
|
pathurl_elem = ET.SubElement(file_elem, 'pathurl')
|
|
pathurl_elem.text = new_pathurl
|
|
updated_count += 1
|
|
|
|
if updated_count > 0:
|
|
try:
|
|
# Write back to file
|
|
tree.write(xml_file, encoding='UTF-8', xml_declaration=True)
|
|
print(f" Updated {updated_count} path references in {xml_file}")
|
|
return True
|
|
except Exception as e:
|
|
print(f"Error writing XML file {xml_file}: {e}", file=sys.stderr)
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def check_ffmpeg() -> bool:
|
|
"""Check if ffmpeg is available."""
|
|
try:
|
|
subprocess.run(['ffmpeg', '-version'], capture_output=True, timeout=5)
|
|
return True
|
|
except (FileNotFoundError, subprocess.TimeoutExpired):
|
|
return False
|
|
|
|
|
|
def main():
|
|
"""Main workflow."""
|
|
# Get current directory
|
|
current_dir = os.getcwd()
|
|
print(f"Working directory: {current_dir}\n")
|
|
|
|
# Find all XML files
|
|
xml_files = [f for f in os.listdir(current_dir) if f.lower().endswith('.xml')]
|
|
|
|
if not xml_files:
|
|
print("No XML files found in current directory.")
|
|
return
|
|
|
|
print(f"Found {len(xml_files)} XML file(s):")
|
|
for xml_file in xml_files:
|
|
print(f" - {xml_file}")
|
|
print()
|
|
|
|
# Extract all video references from XML files
|
|
all_video_refs = set()
|
|
xml_to_refs = {}
|
|
|
|
for xml_file in xml_files:
|
|
refs = extract_video_references(xml_file)
|
|
all_video_refs.update(refs)
|
|
xml_to_refs[xml_file] = refs
|
|
print(f"Found {len(refs)} video reference(s) in {xml_file}")
|
|
|
|
print(f"\nTotal unique video files referenced: {len(all_video_refs)}\n")
|
|
|
|
# Find local files
|
|
file_mapping = find_local_files(all_video_refs, current_dir)
|
|
|
|
# Separate found and missing files
|
|
found_files = set(file_mapping.keys())
|
|
missing_files = {filename for filename, _ in all_video_refs if filename not in found_files}
|
|
|
|
# Show summary
|
|
print("File status:")
|
|
print(f" Found: {len(found_files)}")
|
|
print(f" Missing: {len(missing_files)}")
|
|
|
|
if found_files:
|
|
print("\nFound files:")
|
|
for filename in sorted(found_files):
|
|
mapping = file_mapping[filename]
|
|
status = " (needs MP4→MOV conversion)" if mapping['needs_conversion'] else ""
|
|
print(f" - {filename}{status}")
|
|
|
|
if missing_files:
|
|
print("\nMissing files:")
|
|
for filename in sorted(missing_files):
|
|
print(f" - {filename}")
|
|
|
|
print()
|
|
|
|
# Check for MP4 files that need conversion
|
|
mp4_files = {f: m for f, m in file_mapping.items() if m['needs_conversion']}
|
|
|
|
# Prompt for conversion
|
|
should_convert = False
|
|
if mp4_files:
|
|
response = input(f"Convert {len(mp4_files)} MP4 file(s) to MOV? (y/n): ")
|
|
should_convert = response.lower() == 'y'
|
|
|
|
if should_convert:
|
|
if not check_ffmpeg():
|
|
print("Error: ffmpeg not found. Please install ffmpeg to convert files.", file=sys.stderr)
|
|
should_convert = False
|
|
else:
|
|
print("\nConverting files...")
|
|
conversion_map = {}
|
|
|
|
for filename, mapping in mp4_files.items():
|
|
input_path = mapping['local_path']
|
|
# Create output filename with .mov extension in the same directory as input
|
|
input_dir = os.path.dirname(input_path)
|
|
base_name = os.path.splitext(filename)[0]
|
|
output_filename = f"{base_name}.mov"
|
|
output_path = os.path.join(input_dir, output_filename)
|
|
|
|
if convert_mp4_to_mov(input_path, output_path):
|
|
# Update mapping to point to converted file
|
|
conversion_map[filename] = output_filename
|
|
file_mapping[filename]['local_path'] = output_path
|
|
file_mapping[filename]['needs_conversion'] = False
|
|
|
|
if conversion_map:
|
|
print(f"\nSuccessfully converted {len(conversion_map)} file(s)")
|
|
# Note: file_mapping already updated above, no need to update again
|
|
else:
|
|
print("No MP4 files found that need conversion.")
|
|
|
|
print()
|
|
|
|
# Prompt for XML updates
|
|
response = input("Update XML file paths? (y/n): ")
|
|
should_update = response.lower() == 'y'
|
|
|
|
if should_update:
|
|
print("\nUpdating XML files...")
|
|
for xml_file in xml_files:
|
|
# Only update paths for files referenced in this XML
|
|
xml_specific_mapping = {
|
|
filename: mapping
|
|
for filename, mapping in file_mapping.items()
|
|
if filename in {f for f, _ in xml_to_refs[xml_file]}
|
|
}
|
|
|
|
if xml_specific_mapping:
|
|
update_xml_paths(xml_file, xml_specific_mapping)
|
|
else:
|
|
print(f" No matching files found for {xml_file}")
|
|
|
|
# Final report
|
|
print("\n" + "="*60)
|
|
print("Final Report")
|
|
print("="*60)
|
|
|
|
if missing_files:
|
|
print(f"\nMissing files ({len(missing_files)}):")
|
|
for filename in sorted(missing_files):
|
|
print(f" - {filename}")
|
|
else:
|
|
print("\nAll referenced files were found!")
|
|
|
|
if should_convert and mp4_files:
|
|
converted_count = sum(1 for f, m in file_mapping.items()
|
|
if f in mp4_files and not m['needs_conversion'])
|
|
print(f"\nConverted {converted_count} MP4 file(s) to MOV")
|
|
|
|
if should_update:
|
|
print(f"\nUpdated {len(xml_files)} XML file(s)")
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|