Source code for agentopera.chatflow.media.pil_image

from __future__ import annotations

import base64
import re
from io import BytesIO
from pathlib import Path
from typing import Any, Dict, cast

from PIL import Image as PILImage
from pydantic import GetCoreSchemaHandler, ValidationInfo
from pydantic_core import core_schema
from typing_extensions import Literal

from agentopera.engine.types import Image


[docs]
class PilImage(Image):
    """
    image class based on PIL.
    
    this class provides functions to create, convert and operate images, and can be used to exchange image data in the message system.

    Example:

        Loading an image from a URL:

        .. code-block:: python

            from agentopera.media import Image
            from PIL import Image as PILImage
            import aiohttp
            import asyncio


            async def from_url(url: str) -> Image:
                async with aiohttp.ClientSession() as session:
                    async with session.get(url) as response:
                        content = await response.read()
                        return Image.from_pil(PILImage.open(BytesIO(content)))


            image = asyncio.run(from_url("https://example.com/image"))

    """

    def __init__(self, image: PILImage.Image):
        self.image: PILImage.Image = image.convert("RGB")


[docs]
    @classmethod
    def from_pil(cls, pil_image: PILImage.Image) -> Image:
        """create image object from PIL image"""
        return cls(pil_image)



[docs]
    @classmethod
    def from_uri(cls, uri: str) -> Image:
        """create image object from image uri"""
        if not re.match(r"data:image/(?:png|jpeg);base64,", uri):
            raise ValueError("Invalid URI format. It should be a base64 encoded image URI.")

        # A URI. Remove the prefix and decode the base64 string.
        base64_data = re.sub(r"data:image/(?:png|jpeg);base64,", "", uri)
        return cls.from_base64(base64_data)



[docs]
    @classmethod
    def from_base64(cls, base64_str: str) -> Image:
        """create image object from base64 encoded string"""
        return cls(PILImage.open(BytesIO(base64.b64decode(base64_str))))



[docs]
    def to_base64(self) -> str:
        """convert image to base64 encoded string"""
        buffered = BytesIO()
        self.image.save(buffered, format="PNG")
        content = buffered.getvalue()
        return base64.b64encode(content).decode("utf-8")



[docs]
    @classmethod
    def from_file(cls, file_path: Path) -> Image:
        """create image object from file path"""
        return cls(PILImage.open(file_path))


    def _repr_html_(self) -> str:
        """display image in jupyter notebook"""
        return f'<img src="{self.data_uri}"/>'

    @property
    def data_uri(self) -> str:
        """get data uri of image"""
        return _convert_base64_to_data_uri(self.to_base64())


[docs]
    def to_openai_format(self, detail: Literal["auto", "low", "high"] = "auto") -> Dict[str, Any]:
        """
        convert image to openai api compatible format
        
        Args:
            detail: detail level of image, can be "auto", "low" or "high"
        
        Returns:
            dict: contains image url and detail level
        """
        return {"type": "image_url", "image_url": {"url": self.data_uri, "detail": detail}}



def _convert_base64_to_data_uri(base64_image: str) -> str:
    """convert base64 encoded image to data uri"""
    def _get_mime_type_from_data_uri(base64_image: str) -> str:
        # Decode the base64 string
        image_data = base64.b64decode(base64_image)
        # Check the first few bytes for known signatures
        if image_data.startswith(b"\xff\xd8\xff"):
            return "image/jpeg"
        elif image_data.startswith(b"\x89PNG\r\n\x1a\n"):
            return "image/png"
        elif image_data.startswith(b"GIF87a") or image_data.startswith(b"GIF89a"):
            return "image/gif"
        elif image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP":
            return "image/webp"
        return "image/jpeg"  # use jpeg for unknown formats, best guess.

    mime_type = _get_mime_type_from_data_uri(base64_image)
    data_uri = f"data:{mime_type};base64,{base64_image}"
    return data_uri