Files
webref/backend/app/images/validation.py
Danilo Reyes 010df31455 phase 5
2025-11-02 11:07:42 -06:00

111 lines
2.9 KiB
Python

"""File validation utilities for image uploads."""
import magic
from fastapi import HTTPException, UploadFile, status
# Maximum file size: 50MB
MAX_FILE_SIZE = 52_428_800
# Allowed MIME types
ALLOWED_MIME_TYPES = {
"image/jpeg",
"image/jpg",
"image/png",
"image/gif",
"image/webp",
"image/svg+xml",
}
# Allowed file extensions
ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"}
async def validate_image_file(file: UploadFile) -> bytes:
"""
Validate uploaded image file.
Checks:
- File size within limits
- MIME type allowed
- Magic bytes match declared type
- File extension valid
Args:
file: The uploaded file from FastAPI
Returns:
File contents as bytes
Raises:
HTTPException: If validation fails
"""
# Read file contents
contents = await file.read()
file_size = len(contents)
# Reset file pointer for potential re-reading
await file.seek(0)
# Check file size
if file_size == 0:
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Empty file uploaded")
if file_size > MAX_FILE_SIZE:
raise HTTPException(
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
detail=f"File too large. Maximum size is {MAX_FILE_SIZE / 1_048_576:.1f}MB",
)
# Validate file extension
if file.filename:
extension = "." + file.filename.lower().split(".")[-1] if "." in file.filename else ""
if extension not in ALLOWED_EXTENSIONS:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid file extension. Allowed: {', '.join(ALLOWED_EXTENSIONS)}",
)
# Detect actual MIME type using magic bytes
mime = magic.from_buffer(contents, mime=True)
# Validate MIME type
if mime not in ALLOWED_MIME_TYPES:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid file type '{mime}'. Allowed types: {', '.join(ALLOWED_MIME_TYPES)}",
)
return contents
def sanitize_filename(filename: str) -> str:
"""
Sanitize filename to prevent path traversal and other attacks.
Args:
filename: Original filename
Returns:
Sanitized filename
"""
import re
# Remove path separators
filename = filename.replace("/", "_").replace("\\", "_")
# Remove any non-alphanumeric characters except dots, dashes, underscores
filename = re.sub(r"[^a-zA-Z0-9._-]", "_", filename)
# Limit length
max_length = 255
if len(filename) > max_length:
# Keep extension
parts = filename.rsplit(".", 1)
if len(parts) == 2:
name, ext = parts
filename = name[: max_length - len(ext) - 1] + "." + ext
else:
filename = filename[:max_length]
return filename