kanban-app/backend/app/services/storage/file_processor.py

"""File processor classes for different file types"""

from abc import ABC, abstractmethod
from io import BytesIO
from typing import Optional, Tuple

from flask import current_app
from PIL import Image


class FileProcessor(ABC):
    """Abstract base class for file processors"""

    @abstractmethod
    def validate(self, file_data: BytesIO, file_size: int, mime_type: str) -> bool:
        """
        Validate file

        Args:
            file_data: File data as BytesIO
            file_size: Size of the file in bytes
            mime_type: MIME type of the file

        Returns:
            bool: True if valid
        """
        pass

    @abstractmethod
    def process(self, file_data: BytesIO, mime_type: str) -> dict:
        """
        Process file and return metadata

        Args:
            file_data: File data as BytesIO
            mime_type: MIME type of the file

        Returns:
            dict: Processing results and metadata
        """
        pass

    @abstractmethod
    def get_file_type(self) -> str:
        """Return the file type (image, pdf, document, etc.)"""
        pass


class ImageProcessor(FileProcessor):
    """Processor for image files"""

    def __init__(self):
        self.supported_formats = {
            "image/jpeg": "jpeg",
            "image/png": "png",
            "image/gif": "gif",
            "image/webp": "webp",
            "image/svg+xml": "svg+xml",
        }

    def validate(self, file_data: BytesIO, file_size: int, mime_type: str) -> bool:
        """Validate image file"""
        # Check file size
        if file_size > current_app.config["MAX_UPLOAD_SIZE_IMAGE"]:
            current_app.logger.error(
                f"Image size {file_size} exceeds "
                + "maximum {current_app.config['MAX_UPLOAD_SIZE_IMAGE']}"
            )
            return False

        # Check MIME type
        if mime_type not in current_app.config["ALLOWED_IMAGE_TYPES"]:
            current_app.logger.error(f"Unsupported image MIME type: {mime_type}")
            return False

        # Verify it's actually an image
        try:
            file_data.seek(0)
            img = Image.open(file_data)
            img.verify()
            file_data.seek(0)
            return True
        except Exception as e:
            current_app.logger.error(f"Image validation failed: {e}")
            return False

    def process(self, file_data: BytesIO, mime_type: str) -> dict:
        """Process image and extract metadata"""
        file_data.seek(0)
        img = Image.open(file_data)

        metadata = {
            "width": img.width,
            "height": img.height,
            "format": img.format,
            "mode": img.mode,
            "file_type": "image",
        }

        # Generate thumbnail
        thumbnail_data = self._generate_thumbnail(img)

        file_data.seek(0)
        return {
            "metadata": metadata,
            "thumbnail_data": thumbnail_data,
        }

    def _generate_thumbnail(
        self, img: Image.Image, size: Tuple[int, int] = (200, 200)
    ) -> Optional[BytesIO]:
        """
        Generate thumbnail for image

        Args:
            img: PIL Image object
            size: Thumbnail size (width, height)

        Returns:
            BytesIO: Thumbnail data or None
        """
        try:
            # Create thumbnail
            img_copy = img.copy()
            img_copy.thumbnail(size, Image.Resampling.LANCZOS)

            # Convert RGBA to RGB if necessary (JPEG doesn't support transparency)
            if img_copy.mode in ("RGBA", "LA", "P"):
                # Create white background
                background = Image.new("RGB", img_copy.size, (255, 255, 255))
                if img_copy.mode == "P":
                    img_copy = img_copy.convert("RGBA")
                if img_copy.mode in ("RGBA", "LA"):
                    background.paste(img_copy, mask=img_copy.split()[-1])
                else:
                    background.paste(img_copy)
                img_copy = background
            elif img_copy.mode != "RGB":
                img_copy = img_copy.convert("RGB")

            # Save to BytesIO
            thumb_data = BytesIO()
            img_copy.save(thumb_data, format="JPEG", quality=85)
            thumb_data.seek(0)

            return thumb_data
        except Exception as e:
            current_app.logger.error(f"Thumbnail generation failed: {e}")
            return None

    def get_file_type(self) -> str:
        return "image"


class PDFProcessor(FileProcessor):
    """Processor for PDF files"""

    def validate(self, file_data: BytesIO, file_size: int, mime_type: str) -> bool:
        """Validate PDF file"""
        # Check file size
        if file_size > current_app.config["MAX_UPLOAD_SIZE_DOCUMENT"]:
            current_app.logger.error(
                f"PDF size {file_size} exceeds "
                f"maximum {current_app.config['MAX_UPLOAD_SIZE_DOCUMENT']}"
            )
            return False

        # Check MIME type
        if mime_type not in current_app.config["ALLOWED_DOCUMENT_TYPES"]:
            current_app.logger.error(f"Unsupported document MIME type: {mime_type}")
            return False

        # Verify it's actually a PDF
        try:
            file_data.seek(0)
            header = file_data.read(4)
            file_data.seek(0)
            if header != b"%PDF":
                current_app.logger.error("Invalid PDF header")
                return False
            return True
        except Exception as e:
            current_app.logger.error(f"PDF validation failed: {e}")
            return False

    def process(self, file_data: BytesIO, mime_type: str) -> dict:
        """Process PDF and extract metadata"""
        file_data.seek(0)

        # Basic PDF metadata extraction
        # Note: For more detailed extraction, you might want to use PyPDF2 or similar
        metadata = {
            "file_type": "pdf",
        }

        return {
            "metadata": metadata,
            "thumbnail_data": None,  # PDFs don't have thumbnails by default
        }

    def get_file_type(self) -> str:
        return "pdf"


class DocumentProcessor(FileProcessor):
    """Processor for other document types (placeholder for future expansion)"""

    def validate(self, file_data: BytesIO, file_size: int, mime_type: str) -> bool:
        """Validate document file"""
        # Basic validation - can be extended for specific document types
        if file_size > current_app.config["MAX_UPLOAD_SIZE_DOCUMENT"]:
            current_app.logger.error("Document size exceeds maximum")
            return False
        return True

    def process(self, file_data: BytesIO, mime_type: str) -> dict:
        """Process document"""
        return {
            "metadata": {"file_type": "document"},
            "thumbnail_data": None,
        }

    def get_file_type(self) -> str:
        return "document"


class ProcessorFactory:
    """Factory for creating appropriate file processors"""

    @staticmethod
    def get_processor(mime_type: str) -> FileProcessor:
        """
        Get appropriate processor for MIME type

        Args:
            mime_type: MIME type of the file

        Returns:
            FileProcessor: Appropriate processor instance
        """
        if mime_type.startswith("image/"):
            return ImageProcessor()
        elif mime_type == "application/pdf":
            return PDFProcessor()
        else:
            return DocumentProcessor()

    @staticmethod
    def get_file_type_from_mime(mime_type: str) -> str:
        """
        Get file type category from MIME type

        Args:
            mime_type: MIME type of the file

        Returns:
            str: File type category
        """
        if mime_type.startswith("image/"):
            return "image"
        elif mime_type == "application/pdf":
            return "pdf"
        else:
            return "document"
add upload logic to app 2026-03-20 17:17:01 +00:00			`"""File processor classes for different file types"""`

			`from abc import ABC, abstractmethod`
			`from io import BytesIO`
			`from typing import Optional, Tuple`

			`from flask import current_app`
			`from PIL import Image`


			`class FileProcessor(ABC):`
			`"""Abstract base class for file processors"""`

			`@abstractmethod`
			`def validate(self, file_data: BytesIO, file_size: int, mime_type: str) -> bool:`
			`"""`
			`Validate file`

			`Args:`
			`file_data: File data as BytesIO`
			`file_size: Size of the file in bytes`
			`mime_type: MIME type of the file`

			`Returns:`
			`bool: True if valid`
			`"""`
			`pass`

			`@abstractmethod`
			`def process(self, file_data: BytesIO, mime_type: str) -> dict:`
			`"""`
			`Process file and return metadata`

			`Args:`
			`file_data: File data as BytesIO`
			`mime_type: MIME type of the file`

			`Returns:`
			`dict: Processing results and metadata`
			`"""`
			`pass`

			`@abstractmethod`
			`def get_file_type(self) -> str:`
			`"""Return the file type (image, pdf, document, etc.)"""`
			`pass`


			`class ImageProcessor(FileProcessor):`
			`"""Processor for image files"""`

			`def __init__(self):`
			`self.supported_formats = {`
			`"image/jpeg": "jpeg",`
			`"image/png": "png",`
			`"image/gif": "gif",`
			`"image/webp": "webp",`
			`"image/svg+xml": "svg+xml",`
			`}`

			`def validate(self, file_data: BytesIO, file_size: int, mime_type: str) -> bool:`
			`"""Validate image file"""`
			`# Check file size`
			`if file_size > current_app.config["MAX_UPLOAD_SIZE_IMAGE"]:`
			`current_app.logger.error(`
			`f"Image size {file_size} exceeds "`
			`+ "maximum {current_app.config['MAX_UPLOAD_SIZE_IMAGE']}"`
			`)`
			`return False`

			`# Check MIME type`
			`if mime_type not in current_app.config["ALLOWED_IMAGE_TYPES"]:`
			`current_app.logger.error(f"Unsupported image MIME type: {mime_type}")`
			`return False`

			`# Verify it's actually an image`
			`try:`
			`file_data.seek(0)`
			`img = Image.open(file_data)`
			`img.verify()`
			`file_data.seek(0)`
			`return True`
			`except Exception as e:`
			`current_app.logger.error(f"Image validation failed: {e}")`
			`return False`

			`def process(self, file_data: BytesIO, mime_type: str) -> dict:`
			`"""Process image and extract metadata"""`
			`file_data.seek(0)`
			`img = Image.open(file_data)`

			`metadata = {`
			`"width": img.width,`
			`"height": img.height,`
			`"format": img.format,`
			`"mode": img.mode,`
			`"file_type": "image",`
			`}`

			`# Generate thumbnail`
			`thumbnail_data = self._generate_thumbnail(img)`

			`file_data.seek(0)`
			`return {`
			`"metadata": metadata,`
			`"thumbnail_data": thumbnail_data,`
			`}`

			`def _generate_thumbnail(`
			`self, img: Image.Image, size: Tuple[int, int] = (200, 200)`
			`) -> Optional[BytesIO]:`
			`"""`
			`Generate thumbnail for image`

			`Args:`
			`img: PIL Image object`
			`size: Thumbnail size (width, height)`

			`Returns:`
			`BytesIO: Thumbnail data or None`
			`"""`
			`try:`
			`# Create thumbnail`
			`img_copy = img.copy()`
			`img_copy.thumbnail(size, Image.Resampling.LANCZOS)`

			`# Convert RGBA to RGB if necessary (JPEG doesn't support transparency)`
			`if img_copy.mode in ("RGBA", "LA", "P"):`
			`# Create white background`
			`background = Image.new("RGB", img_copy.size, (255, 255, 255))`
			`if img_copy.mode == "P":`
			`img_copy = img_copy.convert("RGBA")`
			`if img_copy.mode in ("RGBA", "LA"):`
			`background.paste(img_copy, mask=img_copy.split()[-1])`
			`else:`
			`background.paste(img_copy)`
			`img_copy = background`
			`elif img_copy.mode != "RGB":`
			`img_copy = img_copy.convert("RGB")`

			`# Save to BytesIO`
			`thumb_data = BytesIO()`
			`img_copy.save(thumb_data, format="JPEG", quality=85)`
			`thumb_data.seek(0)`

			`return thumb_data`
			`except Exception as e:`
			`current_app.logger.error(f"Thumbnail generation failed: {e}")`
			`return None`

			`def get_file_type(self) -> str:`
			`return "image"`


			`class PDFProcessor(FileProcessor):`
			`"""Processor for PDF files"""`

			`def validate(self, file_data: BytesIO, file_size: int, mime_type: str) -> bool:`
			`"""Validate PDF file"""`
			`# Check file size`
			`if file_size > current_app.config["MAX_UPLOAD_SIZE_DOCUMENT"]:`
			`current_app.logger.error(`
			`f"PDF size {file_size} exceeds "`
			`f"maximum {current_app.config['MAX_UPLOAD_SIZE_DOCUMENT']}"`
			`)`
			`return False`

			`# Check MIME type`
			`if mime_type not in current_app.config["ALLOWED_DOCUMENT_TYPES"]:`
			`current_app.logger.error(f"Unsupported document MIME type: {mime_type}")`
			`return False`

			`# Verify it's actually a PDF`
			`try:`
			`file_data.seek(0)`
			`header = file_data.read(4)`
			`file_data.seek(0)`
			`if header != b"%PDF":`
			`current_app.logger.error("Invalid PDF header")`
			`return False`
			`return True`
			`except Exception as e:`
			`current_app.logger.error(f"PDF validation failed: {e}")`
			`return False`

			`def process(self, file_data: BytesIO, mime_type: str) -> dict:`
			`"""Process PDF and extract metadata"""`
			`file_data.seek(0)`

			`# Basic PDF metadata extraction`
			`# Note: For more detailed extraction, you might want to use PyPDF2 or similar`
			`metadata = {`
			`"file_type": "pdf",`
			`}`

			`return {`
			`"metadata": metadata,`
			`"thumbnail_data": None, # PDFs don't have thumbnails by default`
			`}`

			`def get_file_type(self) -> str:`
			`return "pdf"`


			`class DocumentProcessor(FileProcessor):`
			`"""Processor for other document types (placeholder for future expansion)"""`

			`def validate(self, file_data: BytesIO, file_size: int, mime_type: str) -> bool:`
			`"""Validate document file"""`
			`# Basic validation - can be extended for specific document types`
			`if file_size > current_app.config["MAX_UPLOAD_SIZE_DOCUMENT"]:`
			`current_app.logger.error("Document size exceeds maximum")`
			`return False`
			`return True`

			`def process(self, file_data: BytesIO, mime_type: str) -> dict:`
			`"""Process document"""`
			`return {`
			`"metadata": {"file_type": "document"},`
			`"thumbnail_data": None,`
			`}`

			`def get_file_type(self) -> str:`
			`return "document"`


			`class ProcessorFactory:`
			`"""Factory for creating appropriate file processors"""`

			`@staticmethod`
			`def get_processor(mime_type: str) -> FileProcessor:`
			`"""`
			`Get appropriate processor for MIME type`

			`Args:`
			`mime_type: MIME type of the file`

			`Returns:`
			`FileProcessor: Appropriate processor instance`
			`"""`
			`if mime_type.startswith("image/"):`
			`return ImageProcessor()`
			`elif mime_type == "application/pdf":`
			`return PDFProcessor()`
			`else:`
			`return DocumentProcessor()`

			`@staticmethod`
			`def get_file_type_from_mime(mime_type: str) -> str:`
			`"""`
			`Get file type category from MIME type`

			`Args:`
			`mime_type: MIME type of the file`

			`Returns:`
			`str: File type category`
			`"""`
			`if mime_type.startswith("image/"):`
			`return "image"`
			`elif mime_type == "application/pdf":`
			`return "pdf"`
			`else:`
			`return "document"`