Source code for aiohttp.compression_utils

import asyncio
import sys
import zlib
from abc import ABC, abstractmethod
from concurrent.futures import Executor
from typing import Any, Final, Protocol, TypedDict, cast

if sys.version_info >= (3, 12):
    from collections.abc import Buffer
else:
    from typing import Union

    Buffer = Union[bytes, bytearray, "memoryview[int]", "memoryview[bytes]"]

try:
    try:
        import brotlicffi as brotli
    except ImportError:
        import brotli

    HAS_BROTLI = True
except ImportError:  # pragma: no cover
    HAS_BROTLI = False

try:
    if sys.version_info >= (3, 14):
        from compression.zstd import ZstdDecompressor  # noqa: I900
    else:  # TODO(PY314): Remove mentions of backports.zstd across codebase
        from backports.zstd import ZstdDecompressor

    HAS_ZSTD = True
except ImportError:
    HAS_ZSTD = False


MAX_SYNC_CHUNK_SIZE = 4096

# Unlimited decompression constants - different libraries use different conventions
ZLIB_MAX_LENGTH_UNLIMITED = 0  # zlib uses 0 to mean unlimited
ZSTD_MAX_LENGTH_UNLIMITED = -1  # zstd uses -1 to mean unlimited


class ZLibCompressObjProtocol(Protocol):
    def compress(self, data: Buffer) -> bytes: ...
    def flush(self, mode: int = ..., /) -> bytes: ...


class ZLibDecompressObjProtocol(Protocol):
    def decompress(self, data: Buffer, max_length: int = ...) -> bytes: ...
    def flush(self, length: int = ..., /) -> bytes: ...

    @property
    def eof(self) -> bool: ...

    @property
    def unconsumed_tail(self) -> bytes: ...

    @property
    def unused_data(self) -> bytes: ...


class ZLibBackendProtocol(Protocol):
    MAX_WBITS: int
    Z_FULL_FLUSH: int
    Z_SYNC_FLUSH: int
    Z_BEST_SPEED: int
    Z_FINISH: int

    def compressobj(
        self,
        level: int = ...,
        method: int = ...,
        wbits: int = ...,
        memLevel: int = ...,
        strategy: int = ...,
        zdict: Buffer | None = ...,
    ) -> ZLibCompressObjProtocol: ...
    def decompressobj(
        self, wbits: int = ..., zdict: Buffer = ...
    ) -> ZLibDecompressObjProtocol: ...

    def compress(
        self, data: Buffer, /, level: int = ..., wbits: int = ...
    ) -> bytes: ...
    def decompress(
        self, data: Buffer, /, wbits: int = ..., bufsize: int = ...
    ) -> bytes: ...


class CompressObjArgs(TypedDict, total=False):
    wbits: int
    strategy: int
    level: int


class ZLibBackendWrapper:
    def __init__(self, _zlib_backend: ZLibBackendProtocol):
        self._zlib_backend: ZLibBackendProtocol = _zlib_backend

    @property
    def name(self) -> str:
        return getattr(self._zlib_backend, "__name__", "undefined")

    @property
    def MAX_WBITS(self) -> int:
        return self._zlib_backend.MAX_WBITS

    @property
    def Z_FULL_FLUSH(self) -> int:
        return self._zlib_backend.Z_FULL_FLUSH

    @property
    def Z_SYNC_FLUSH(self) -> int:
        return self._zlib_backend.Z_SYNC_FLUSH

    @property
    def Z_BEST_SPEED(self) -> int:
        return self._zlib_backend.Z_BEST_SPEED

    @property
    def Z_FINISH(self) -> int:
        return self._zlib_backend.Z_FINISH

    def compressobj(self, *args: Any, **kwargs: Any) -> ZLibCompressObjProtocol:
        return self._zlib_backend.compressobj(*args, **kwargs)

    def decompressobj(self, *args: Any, **kwargs: Any) -> ZLibDecompressObjProtocol:
        return self._zlib_backend.decompressobj(*args, **kwargs)

    def compress(self, data: Buffer, *args: Any, **kwargs: Any) -> bytes:
        return self._zlib_backend.compress(data, *args, **kwargs)

    def decompress(self, data: Buffer, *args: Any, **kwargs: Any) -> bytes:
        return self._zlib_backend.decompress(data, *args, **kwargs)

    # Everything not explicitly listed in the Protocol we just pass through
    def __getattr__(self, attrname: str) -> Any:
        return getattr(self._zlib_backend, attrname)


ZLibBackend: ZLibBackendWrapper = ZLibBackendWrapper(zlib)



[docs]
def set_zlib_backend(new_zlib_backend: ZLibBackendProtocol) -> None:
    ZLibBackend._zlib_backend = new_zlib_backend



def encoding_to_mode(
    encoding: str | None = None,
    suppress_deflate_header: bool = False,
) -> int:
    if encoding == "gzip":
        return 16 + ZLibBackend.MAX_WBITS

    return -ZLibBackend.MAX_WBITS if suppress_deflate_header else ZLibBackend.MAX_WBITS


class DecompressionBaseHandler(ABC):
    def __init__(
        self,
        executor: Executor | None = None,
        max_sync_chunk_size: int | None = MAX_SYNC_CHUNK_SIZE,
    ):
        """Base class for decompression handlers."""
        self._executor = executor
        self._max_sync_chunk_size = max_sync_chunk_size

    @abstractmethod
    def decompress_sync(
        self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED
    ) -> bytes:
        """Decompress the given data."""

    async def decompress(
        self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED
    ) -> bytes:
        """Decompress the given data."""
        if (
            self._max_sync_chunk_size is not None
            and len(data) > self._max_sync_chunk_size
        ):
            return await asyncio.get_event_loop().run_in_executor(
                self._executor, self.decompress_sync, data, max_length
            )
        return self.decompress_sync(data, max_length)

    @property
    @abstractmethod
    def data_available(self) -> bool:
        """Return True if more output is available by passing b""."""


class ZLibCompressor:
    def __init__(
        self,
        encoding: str | None = None,
        suppress_deflate_header: bool = False,
        level: int | None = None,
        wbits: int | None = None,
        strategy: int | None = None,
        executor: Executor | None = None,
        max_sync_chunk_size: int | None = MAX_SYNC_CHUNK_SIZE,
    ):
        self._executor = executor
        self._max_sync_chunk_size = max_sync_chunk_size
        self._mode = (
            encoding_to_mode(encoding, suppress_deflate_header)
            if wbits is None
            else wbits
        )
        self._zlib_backend: Final = ZLibBackendWrapper(ZLibBackend._zlib_backend)

        kwargs: CompressObjArgs = {}
        kwargs["wbits"] = self._mode
        if strategy is not None:
            kwargs["strategy"] = strategy
        if level is not None:
            kwargs["level"] = level
        self._compressor = self._zlib_backend.compressobj(**kwargs)

    def compress_sync(self, data: Buffer) -> bytes:
        return self._compressor.compress(data)

    async def compress(self, data: Buffer) -> bytes:
        """Compress the data and returned the compressed bytes.

        Note that flush() must be called after the last call to compress()

        If the data size is large than the max_sync_chunk_size, the compression
        will be done in the executor. Otherwise, the compression will be done
        in the event loop.

        **WARNING: This method is NOT cancellation-safe when used with flush().**
        If this operation is cancelled, the compressor state may be corrupted.
        The connection MUST be closed after cancellation to avoid data corruption
        in subsequent compress operations.

        For cancellation-safe compression (e.g., WebSocket), the caller MUST wrap
        compress() + flush() + send operations in a shield and lock to ensure atomicity.
        """
        # For large payloads, offload compression to executor to avoid blocking event loop
        should_use_executor = (
            self._max_sync_chunk_size is not None
            and len(data) > self._max_sync_chunk_size
        )
        if should_use_executor:
            return await asyncio.get_running_loop().run_in_executor(
                self._executor, self._compressor.compress, data
            )
        return self.compress_sync(data)

    def flush(self, mode: int | None = None) -> bytes:
        """Flush the compressor synchronously.

        **WARNING: This method is NOT cancellation-safe when called after compress().**
        The flush() operation accesses shared compressor state. If compress() was
        cancelled, calling flush() may result in corrupted data. The connection MUST
        be closed after compress() cancellation.

        For cancellation-safe compression (e.g., WebSocket), the caller MUST wrap
        compress() + flush() + send operations in a shield and lock to ensure atomicity.
        """
        return self._compressor.flush(
            mode if mode is not None else self._zlib_backend.Z_FINISH
        )


class ZLibDecompressor(DecompressionBaseHandler):
    def __init__(
        self,
        encoding: str | None = None,
        suppress_deflate_header: bool = False,
        executor: Executor | None = None,
        max_sync_chunk_size: int | None = MAX_SYNC_CHUNK_SIZE,
    ):
        super().__init__(executor=executor, max_sync_chunk_size=max_sync_chunk_size)
        self._mode = encoding_to_mode(encoding, suppress_deflate_header)
        self._zlib_backend: Final = ZLibBackendWrapper(ZLibBackend._zlib_backend)
        self._decompressor = self._zlib_backend.decompressobj(wbits=self._mode)
        self._last_empty = False
        self._pending_unused_data: bytes | None = None

    def decompress_sync(
        self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED
    ) -> bytes:
        if self._pending_unused_data is not None:
            data = self._pending_unused_data + bytes(data)
            self._pending_unused_data = None
        result = self._decompressor.decompress(
            self._decompressor.unconsumed_tail + data, max_length
        )
        # Only way to know that isal has no further data is checking we get no output
        self._last_empty = result == b""

        # Handle concatenated gzip/deflate streams (multi-member).
        # After a member ends, unused_data holds the start of the next member.
        # Create a fresh decompressor for each subsequent member.
        while self._decompressor.eof and self._decompressor.unused_data:
            unused = self._decompressor.unused_data
            self._decompressor = self._zlib_backend.decompressobj(wbits=self._mode)
            if max_length != ZLIB_MAX_LENGTH_UNLIMITED:
                max_length -= len(result)
                if max_length <= 0:
                    self._pending_unused_data = unused
                    break
            chunk = self._decompressor.decompress(unused, max_length)
            self._last_empty = chunk == b""
            result += chunk

        # Member ended exactly at chunk boundary — no unused_data, but the
        # next feed_data() call would fail on the spent decompressor.
        # Only reset for gzip; deflate's feed_eof() relies on eof=True to
        # confirm the stream is complete.
        if self._decompressor.eof and self._mode > self._zlib_backend.MAX_WBITS:
            self._decompressor = self._zlib_backend.decompressobj(wbits=self._mode)

        return result

    def flush(self, length: int = 0) -> bytes:
        return (
            self._decompressor.flush(length)
            if length > 0
            else self._decompressor.flush()
        )

    @property
    def data_available(self) -> bool:
        return (
            bool(self._decompressor.unconsumed_tail)
            or not self._last_empty
            or self._pending_unused_data is not None
        )

    @property
    def eof(self) -> bool:
        return self._decompressor.eof


class BrotliDecompressor(DecompressionBaseHandler):
    # Supports both 'brotlipy' and 'Brotli' packages
    # since they share an import name. The top branches
    # are for 'brotlipy' and bottom branches for 'Brotli'
    def __init__(
        self,
        executor: Executor | None = None,
        max_sync_chunk_size: int | None = MAX_SYNC_CHUNK_SIZE,
    ) -> None:
        """Decompress data using the Brotli library."""
        if not HAS_BROTLI:
            raise RuntimeError(
                "The brotli decompression is not available. "
                "Please install `Brotli` module"
            )
        self._obj = brotli.Decompressor()
        self._last_empty = False
        super().__init__(executor=executor, max_sync_chunk_size=max_sync_chunk_size)

    def decompress_sync(
        self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED
    ) -> bytes:
        """Decompress the given data."""
        if hasattr(self._obj, "decompress"):
            if max_length == ZLIB_MAX_LENGTH_UNLIMITED:
                result = cast(bytes, self._obj.decompress(data))
            else:
                result = cast(bytes, self._obj.decompress(data, max_length))
        else:
            if max_length == ZLIB_MAX_LENGTH_UNLIMITED:
                result = cast(bytes, self._obj.process(data))
            else:
                result = cast(bytes, self._obj.process(data, max_length))
        # Only way to know that brotli has no further data is checking we get no output
        self._last_empty = result == b""
        return result

    def flush(self) -> bytes:
        """Flush the decompressor."""
        if hasattr(self._obj, "flush"):
            return cast(bytes, self._obj.flush())
        return b""

    @property
    def data_available(self) -> bool:
        return not self._obj.is_finished() and not self._last_empty


class ZSTDDecompressor(DecompressionBaseHandler):
    def __init__(
        self,
        executor: Executor | None = None,
        max_sync_chunk_size: int | None = MAX_SYNC_CHUNK_SIZE,
    ) -> None:
        if not HAS_ZSTD:
            raise RuntimeError(
                "The zstd decompression is not available. "
                "Please install `backports.zstd` module"
            )
        self._obj = ZstdDecompressor()
        self._pending_unused_data: bytes | None = None
        super().__init__(executor=executor, max_sync_chunk_size=max_sync_chunk_size)

    def decompress_sync(
        self, data: Buffer, max_length: int = ZLIB_MAX_LENGTH_UNLIMITED
    ) -> bytes:
        # zstd uses -1 for unlimited, while zlib uses 0 for unlimited
        # Convert the zlib convention (0=unlimited) to zstd convention (-1=unlimited)
        zstd_max_length = (
            ZSTD_MAX_LENGTH_UNLIMITED
            if max_length == ZLIB_MAX_LENGTH_UNLIMITED
            else max_length
        )
        if self._pending_unused_data is not None:
            data = self._pending_unused_data + data
            self._pending_unused_data = None
        result = self._obj.decompress(data, zstd_max_length)

        # Handle multi-frame zstd streams.
        # https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1
        # ZstdDecompressor handles one frame only. When a frame ends,
        # eof becomes True and any trailing data goes to unused_data.
        # We create a fresh decompressor to continue with the next frame.
        while self._obj.eof and self._obj.unused_data:
            unused_data = self._obj.unused_data
            self._obj = ZstdDecompressor()
            if zstd_max_length != ZSTD_MAX_LENGTH_UNLIMITED:
                zstd_max_length -= len(result)
                if zstd_max_length <= 0:
                    self._pending_unused_data = unused_data
                    break
            result += self._obj.decompress(unused_data, zstd_max_length)

        # Frame ended exactly at chunk boundary — no unused_data, but the
        # next feed_data() call would fail on the spent decompressor.
        # Prepare a fresh one for the next chunk.
        if self._obj.eof:
            self._obj = ZstdDecompressor()

        return result

    def flush(self) -> bytes:
        return b""

    @property
    def data_available(self) -> bool:
        return (
            not self._obj.needs_input and not self._obj.eof
        ) or self._pending_unused_data is not None