441 lines
15 KiB
Python
441 lines
15 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Premiere to Resolve XML Converter
|
||
|
||
Processes Adobe Premiere Pro XML exports to make them compatible with DaVinci Resolve on Linux.
|
||
Updates file paths and optionally converts MP4 files to MOV by re-encoding to Resolve-friendly
|
||
codecs (ProRes 422 HQ, DNxHD, or H.264 in MOV). Stream copy is not used so Resolve can read
|
||
the resulting files on Linux.
|
||
"""
|
||
|
||
import os
|
||
import sys
|
||
import xml.etree.ElementTree as ET
|
||
import urllib.parse
|
||
import subprocess
|
||
from pathlib import Path
|
||
from typing import Dict, Tuple, Set
|
||
|
||
|
||
def extract_video_references(xml_file: str) -> Set[Tuple[str, str]]:
|
||
"""
|
||
Parse XML and extract all unique video file references.
|
||
|
||
Returns a set of tuples: (filename, pathurl)
|
||
"""
|
||
try:
|
||
tree = ET.parse(xml_file)
|
||
root = tree.getroot()
|
||
except ET.ParseError as e:
|
||
print(f"Error parsing XML file {xml_file}: {e}", file=sys.stderr)
|
||
return set()
|
||
|
||
video_refs = set()
|
||
|
||
# Find all <file> elements that contain <pathurl> tags
|
||
for file_elem in root.iter('file'):
|
||
name_elem = file_elem.find('name')
|
||
pathurl_elem = file_elem.find('pathurl')
|
||
|
||
if name_elem is not None and name_elem.text:
|
||
filename = name_elem.text.strip()
|
||
pathurl = ""
|
||
|
||
if pathurl_elem is not None and pathurl_elem.text:
|
||
pathurl = pathurl_elem.text.strip()
|
||
|
||
# Only include video files (check extension)
|
||
if filename.lower().endswith(('.mp4', '.mov', '.avi', '.mxf', '.mts', '.mkv')):
|
||
video_refs.add((filename, pathurl))
|
||
|
||
return video_refs
|
||
|
||
|
||
def decode_pathurl(pathurl: str) -> str:
|
||
"""
|
||
Decode a file:// URL path to a regular path and extract filename.
|
||
|
||
Example: file://localhost/C%3a/Users/.../file.mp4 -> file.mp4
|
||
"""
|
||
if not pathurl:
|
||
return ""
|
||
|
||
try:
|
||
# Remove file://localhost/ or file:/// prefix
|
||
if pathurl.startswith('file://localhost/'):
|
||
path_part = pathurl[17:] # Remove 'file://localhost/'
|
||
elif pathurl.startswith('file:///'):
|
||
path_part = pathurl[7:] # Remove 'file:///'
|
||
elif pathurl.startswith('file://'):
|
||
path_part = pathurl[7:] # Remove 'file://'
|
||
else:
|
||
path_part = pathurl
|
||
|
||
# URL decode
|
||
decoded = urllib.parse.unquote(path_part)
|
||
|
||
# Extract filename
|
||
filename = os.path.basename(decoded)
|
||
return filename
|
||
except Exception as e:
|
||
print(f"Error decoding pathurl {pathurl}: {e}", file=sys.stderr)
|
||
return ""
|
||
|
||
|
||
def find_local_files(video_refs: Set[Tuple[str, str]], directory: str) -> Dict[str, Dict[str, str]]:
|
||
"""
|
||
Match XML references with files in directory (searches recursively).
|
||
|
||
Returns a dict mapping original filename to:
|
||
{
|
||
'local_path': actual path found,
|
||
'original_pathurl': original pathurl from XML,
|
||
'needs_conversion': True if MP4 needs conversion
|
||
}
|
||
"""
|
||
directory_path = Path(directory)
|
||
file_mapping = {}
|
||
|
||
# Create a case-insensitive mapping of files in directory (recursive search)
|
||
local_files = {}
|
||
for file_path in directory_path.rglob('*'):
|
||
if file_path.is_file():
|
||
local_files[file_path.name.lower()] = str(file_path.resolve())
|
||
|
||
for filename, pathurl in video_refs:
|
||
# Try exact match first
|
||
local_path = None
|
||
if filename in local_files:
|
||
local_path = local_files[filename]
|
||
elif filename.lower() in local_files:
|
||
local_path = local_files[filename.lower()]
|
||
|
||
if local_path:
|
||
needs_conversion = filename.lower().endswith('.mp4')
|
||
file_mapping[filename] = {
|
||
'local_path': local_path,
|
||
'original_pathurl': pathurl,
|
||
'needs_conversion': needs_conversion
|
||
}
|
||
|
||
return file_mapping
|
||
|
||
|
||
def convert_mp4_to_mov(input_file: str, output_file: str) -> bool:
|
||
"""
|
||
Convert MP4 to MOV by re-encoding to a codec DaVinci Resolve on Linux supports.
|
||
|
||
Resolve on Linux does not reliably support H.264-in-MP4 or stream-copied
|
||
codecs in MOV. We always re-encode to ProRes 422 HQ (decode-only on Linux
|
||
but fully supported for import), with fallback to DNxHD or H.264 in MOV.
|
||
"""
|
||
# Check if output file already exists
|
||
if os.path.exists(output_file):
|
||
response = input(f"Output file {output_file} already exists. Overwrite? (y/n): ")
|
||
if response.lower() != 'y':
|
||
return False
|
||
|
||
try:
|
||
subprocess.run(["ffmpeg", "-version"], capture_output=True, timeout=5)
|
||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||
print("Error: ffmpeg not found. Please install ffmpeg.", file=sys.stderr)
|
||
return False
|
||
|
||
# 1. Prefer ProRes 422 HQ – well supported for import in Resolve on Linux
|
||
cmd_prores = [
|
||
"ffmpeg",
|
||
"-i",
|
||
input_file,
|
||
"-c:v",
|
||
"prores",
|
||
"-profile:v",
|
||
"2", # ProRes 422 HQ
|
||
"-c:a",
|
||
"pcm_s16le", # Uncompressed audio, always supported
|
||
"-y",
|
||
output_file,
|
||
]
|
||
try:
|
||
result = subprocess.run(
|
||
cmd_prores, capture_output=True, text=True, timeout=3600
|
||
)
|
||
if result.returncode == 0:
|
||
print(
|
||
f" Converted (ProRes 422 HQ): {os.path.basename(input_file)} -> {os.path.basename(output_file)}"
|
||
)
|
||
return True
|
||
except subprocess.TimeoutExpired:
|
||
print(f" Timeout converting {input_file}", file=sys.stderr)
|
||
return False
|
||
|
||
# 2. Fallback: DNxHD 120 (1080p-friendly, full encode/decode support on Resolve Linux)
|
||
print(f" ProRes failed, trying DNxHD for {os.path.basename(input_file)}")
|
||
cmd_dnx = [
|
||
"ffmpeg",
|
||
"-i",
|
||
input_file,
|
||
"-c:v",
|
||
"dnxhd",
|
||
"-b:v",
|
||
"120M", # DNxHD 120 Mbps (1080p)
|
||
"-c:a",
|
||
"pcm_s16le",
|
||
"-y",
|
||
output_file,
|
||
]
|
||
try:
|
||
result = subprocess.run(cmd_dnx, capture_output=True, text=True, timeout=3600)
|
||
if result.returncode == 0:
|
||
print(
|
||
f" Converted (DNxHD): {os.path.basename(input_file)} -> {os.path.basename(output_file)}"
|
||
)
|
||
return True
|
||
except subprocess.TimeoutExpired:
|
||
print(f" Timeout converting {input_file}", file=sys.stderr)
|
||
return False
|
||
|
||
# 3. Last resort: H.264 in MOV (re-encoded, not copy)
|
||
print(f" DNxHD failed, using H.264 in MOV for {os.path.basename(input_file)}")
|
||
cmd_h264 = [
|
||
"ffmpeg",
|
||
"-i",
|
||
input_file,
|
||
"-c:v",
|
||
"libx264",
|
||
"-crf",
|
||
"18",
|
||
"-preset",
|
||
"slow",
|
||
"-pix_fmt",
|
||
"yuv420p", # Maximum compatibility
|
||
"-c:a",
|
||
"pcm_s16le",
|
||
"-y",
|
||
output_file,
|
||
]
|
||
try:
|
||
result = subprocess.run(cmd_h264, capture_output=True, text=True, timeout=3600)
|
||
if result.returncode == 0:
|
||
print(
|
||
f" Converted (H.264 MOV): {os.path.basename(input_file)} -> {os.path.basename(output_file)}"
|
||
)
|
||
return True
|
||
print(f" Error converting {input_file}: {result.stderr}", file=sys.stderr)
|
||
return False
|
||
except subprocess.TimeoutExpired:
|
||
print(f" Timeout converting {input_file}", file=sys.stderr)
|
||
return False
|
||
except Exception as e:
|
||
print(f" Error converting {input_file}: {e}", file=sys.stderr)
|
||
return False
|
||
|
||
|
||
def update_xml_paths(xml_file: str, file_mapping: Dict[str, Dict[str, str]]) -> bool:
|
||
"""
|
||
Update XML pathurl tags with new Linux file:// URLs.
|
||
|
||
Returns True if successful, False otherwise.
|
||
"""
|
||
try:
|
||
tree = ET.parse(xml_file)
|
||
root = tree.getroot()
|
||
except ET.ParseError as e:
|
||
print(f"Error parsing XML file {xml_file}: {e}", file=sys.stderr)
|
||
return False
|
||
|
||
updated_count = 0
|
||
|
||
# Find all <file> elements and update their pathurl and name if needed
|
||
for file_elem in root.iter('file'):
|
||
name_elem = file_elem.find('name')
|
||
pathurl_elem = file_elem.find('pathurl')
|
||
|
||
if name_elem is not None and name_elem.text:
|
||
filename = name_elem.text.strip()
|
||
|
||
if filename in file_mapping:
|
||
mapping = file_mapping[filename]
|
||
new_path = mapping['local_path']
|
||
new_filename = os.path.basename(new_path)
|
||
|
||
# Update name tag if filename changed (e.g., MP4 -> MOV)
|
||
if new_filename != filename:
|
||
name_elem.text = new_filename
|
||
|
||
# Convert to file:// URL format
|
||
# On Linux, file:// URLs should be file:///absolute/path (three slashes)
|
||
# Encode the path properly, but preserve forward slashes
|
||
# urllib.parse.quote with safe='/' will preserve slashes, so /path becomes /path
|
||
# Then file:///path gives us the correct three-slash format
|
||
encoded_path = urllib.parse.quote(new_path, safe='/')
|
||
new_pathurl = f"file://{encoded_path}"
|
||
|
||
if pathurl_elem is not None:
|
||
pathurl_elem.text = new_pathurl
|
||
updated_count += 1
|
||
else:
|
||
# Create pathurl element if it doesn't exist
|
||
pathurl_elem = ET.SubElement(file_elem, 'pathurl')
|
||
pathurl_elem.text = new_pathurl
|
||
updated_count += 1
|
||
|
||
if updated_count > 0:
|
||
try:
|
||
# Write back to file
|
||
tree.write(xml_file, encoding='UTF-8', xml_declaration=True)
|
||
print(f" Updated {updated_count} path references in {xml_file}")
|
||
return True
|
||
except Exception as e:
|
||
print(f"Error writing XML file {xml_file}: {e}", file=sys.stderr)
|
||
return False
|
||
|
||
return True
|
||
|
||
|
||
def check_ffmpeg() -> bool:
|
||
"""Check if ffmpeg is available."""
|
||
try:
|
||
subprocess.run(['ffmpeg', '-version'], capture_output=True, timeout=5)
|
||
return True
|
||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||
return False
|
||
|
||
|
||
def main():
|
||
"""Main workflow."""
|
||
# Get current directory
|
||
current_dir = os.getcwd()
|
||
print(f"Working directory: {current_dir}\n")
|
||
|
||
# Find all XML files
|
||
xml_files = [f for f in os.listdir(current_dir) if f.lower().endswith('.xml')]
|
||
|
||
if not xml_files:
|
||
print("No XML files found in current directory.")
|
||
return
|
||
|
||
print(f"Found {len(xml_files)} XML file(s):")
|
||
for xml_file in xml_files:
|
||
print(f" - {xml_file}")
|
||
print()
|
||
|
||
# Extract all video references from XML files
|
||
all_video_refs = set()
|
||
xml_to_refs = {}
|
||
|
||
for xml_file in xml_files:
|
||
refs = extract_video_references(xml_file)
|
||
all_video_refs.update(refs)
|
||
xml_to_refs[xml_file] = refs
|
||
print(f"Found {len(refs)} video reference(s) in {xml_file}")
|
||
|
||
print(f"\nTotal unique video files referenced: {len(all_video_refs)}\n")
|
||
|
||
# Find local files
|
||
file_mapping = find_local_files(all_video_refs, current_dir)
|
||
|
||
# Separate found and missing files
|
||
found_files = set(file_mapping.keys())
|
||
missing_files = {filename for filename, _ in all_video_refs if filename not in found_files}
|
||
|
||
# Show summary
|
||
print("File status:")
|
||
print(f" Found: {len(found_files)}")
|
||
print(f" Missing: {len(missing_files)}")
|
||
|
||
if found_files:
|
||
print("\nFound files:")
|
||
for filename in sorted(found_files):
|
||
mapping = file_mapping[filename]
|
||
status = " (needs MP4→MOV conversion)" if mapping['needs_conversion'] else ""
|
||
print(f" - {filename}{status}")
|
||
|
||
if missing_files:
|
||
print("\nMissing files:")
|
||
for filename in sorted(missing_files):
|
||
print(f" - {filename}")
|
||
|
||
print()
|
||
|
||
# Check for MP4 files that need conversion
|
||
mp4_files = {f: m for f, m in file_mapping.items() if m['needs_conversion']}
|
||
|
||
# Prompt for conversion
|
||
should_convert = False
|
||
if mp4_files:
|
||
response = input(f"Convert {len(mp4_files)} MP4 file(s) to MOV? (y/n): ")
|
||
should_convert = response.lower() == 'y'
|
||
|
||
if should_convert:
|
||
if not check_ffmpeg():
|
||
print("Error: ffmpeg not found. Please install ffmpeg to convert files.", file=sys.stderr)
|
||
should_convert = False
|
||
else:
|
||
print("\nConverting files...")
|
||
conversion_map = {}
|
||
|
||
for filename, mapping in mp4_files.items():
|
||
input_path = mapping['local_path']
|
||
# Create output filename with .mov extension in the same directory as input
|
||
input_dir = os.path.dirname(input_path)
|
||
base_name = os.path.splitext(filename)[0]
|
||
output_filename = f"{base_name}.mov"
|
||
output_path = os.path.join(input_dir, output_filename)
|
||
|
||
if convert_mp4_to_mov(input_path, output_path):
|
||
# Update mapping to point to converted file
|
||
conversion_map[filename] = output_filename
|
||
file_mapping[filename]['local_path'] = output_path
|
||
file_mapping[filename]['needs_conversion'] = False
|
||
|
||
if conversion_map:
|
||
print(f"\nSuccessfully converted {len(conversion_map)} file(s)")
|
||
# Note: file_mapping already updated above, no need to update again
|
||
else:
|
||
print("No MP4 files found that need conversion.")
|
||
|
||
print()
|
||
|
||
# Prompt for XML updates
|
||
response = input("Update XML file paths? (y/n): ")
|
||
should_update = response.lower() == 'y'
|
||
|
||
if should_update:
|
||
print("\nUpdating XML files...")
|
||
for xml_file in xml_files:
|
||
# Only update paths for files referenced in this XML
|
||
xml_specific_mapping = {
|
||
filename: mapping
|
||
for filename, mapping in file_mapping.items()
|
||
if filename in {f for f, _ in xml_to_refs[xml_file]}
|
||
}
|
||
|
||
if xml_specific_mapping:
|
||
update_xml_paths(xml_file, xml_specific_mapping)
|
||
else:
|
||
print(f" No matching files found for {xml_file}")
|
||
|
||
# Final report
|
||
print("\n" + "="*60)
|
||
print("Final Report")
|
||
print("="*60)
|
||
|
||
if missing_files:
|
||
print(f"\nMissing files ({len(missing_files)}):")
|
||
for filename in sorted(missing_files):
|
||
print(f" - {filename}")
|
||
else:
|
||
print("\nAll referenced files were found!")
|
||
|
||
if should_convert and mp4_files:
|
||
converted_count = sum(1 for f, m in file_mapping.items()
|
||
if f in mp4_files and not m['needs_conversion'])
|
||
print(f"\nConverted {converted_count} MP4 file(s) to MOV")
|
||
|
||
if should_update:
|
||
print(f"\nUpdated {len(xml_files)} XML file(s)")
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|