"""File validation utilities for image uploads.""" import magic from fastapi import HTTPException, UploadFile, status # Maximum file size: 50MB MAX_FILE_SIZE = 52_428_800 # Allowed MIME types ALLOWED_MIME_TYPES = { "image/jpeg", "image/jpg", "image/png", "image/gif", "image/webp", "image/svg+xml", } # Allowed file extensions ALLOWED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"} async def validate_image_file(file: UploadFile) -> bytes: """ Validate uploaded image file. Checks: - File size within limits - MIME type allowed - Magic bytes match declared type - File extension valid Args: file: The uploaded file from FastAPI Returns: File contents as bytes Raises: HTTPException: If validation fails """ # Read file contents contents = await file.read() file_size = len(contents) # Reset file pointer for potential re-reading await file.seek(0) # Check file size if file_size == 0: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Empty file uploaded") if file_size > MAX_FILE_SIZE: raise HTTPException( status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, detail=f"File too large. Maximum size is {MAX_FILE_SIZE / 1_048_576:.1f}MB", ) # Validate file extension if file.filename: extension = "." + file.filename.lower().split(".")[-1] if "." in file.filename else "" if extension not in ALLOWED_EXTENSIONS: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid file extension. Allowed: {', '.join(ALLOWED_EXTENSIONS)}", ) # Detect actual MIME type using magic bytes mime = magic.from_buffer(contents, mime=True) # Validate MIME type if mime not in ALLOWED_MIME_TYPES: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=f"Invalid file type '{mime}'. Allowed types: {', '.join(ALLOWED_MIME_TYPES)}", ) return contents def sanitize_filename(filename: str) -> str: """ Sanitize filename to prevent path traversal and other attacks. Args: filename: Original filename Returns: Sanitized filename """ import re # Remove path separators filename = filename.replace("/", "_").replace("\\", "_") # Remove any non-alphanumeric characters except dots, dashes, underscores filename = re.sub(r"[^a-zA-Z0-9._-]", "_", filename) # Limit length max_length = 255 if len(filename) > max_length: # Keep extension parts = filename.rsplit(".", 1) if len(parts) == 2: name, ext = parts filename = name[: max_length - len(ext) - 1] + "." + ext else: filename = filename[:max_length] return filename