"""ZIP file extraction handler for batch image uploads.""" import io import zipfile from collections.abc import AsyncIterator from fastapi import HTTPException, UploadFile, status async def extract_images_from_zip(zip_file: UploadFile) -> AsyncIterator[tuple[str, bytes]]: """ Extract image files from ZIP archive. Args: zip_file: Uploaded ZIP file Yields: Tuples of (filename, contents) for each image file Raises: HTTPException: If ZIP is invalid or too large """ # Read ZIP contents zip_contents = await zip_file.read() # Check ZIP size (max 200MB for ZIP) max_zip_size = 200 * 1024 * 1024 # 200MB if len(zip_contents) > max_zip_size: raise HTTPException( status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE, detail=f"ZIP file too large. Maximum size is {max_zip_size / 1_048_576:.1f}MB", ) try: # Open ZIP file with zipfile.ZipFile(io.BytesIO(zip_contents)) as zip_ref: # Get list of image files (filter by extension) image_extensions = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"} image_files = [ name for name in zip_ref.namelist() if not name.startswith("__MACOSX/") # Skip macOS metadata and not name.startswith(".") # Skip hidden files and any(name.lower().endswith(ext) for ext in image_extensions) ] if not image_files: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail="No valid image files found in ZIP archive", ) # Extract each image for filename in image_files: # Skip directories if filename.endswith("/"): continue # Get just the filename without path base_filename = filename.split("/")[-1] # Read file contents file_contents = zip_ref.read(filename) yield base_filename, file_contents except zipfile.BadZipFile as e: raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail="Invalid ZIP file") from e except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error processing ZIP file: {str(e)}", ) from e