"""Incoming request contracts — :class:`RequestIX` + nested option blocks. Mirrors MVP spec §3 exactly. Dropped spec fields (e.g. ``use_vision``, ``reasoning_effort``, ``version``) are intentionally absent from this module; if a caller sends one ``RequestIX`` rejects the payload (``extra="forbid"``). The file-URL union keeps plain strings as ``str`` and dict entries as :class:`FileRef` so callers can mix auth-required URLs with plain ones in a single list without wrapping every entry. """ from __future__ import annotations from typing import Literal from pydantic import BaseModel, ConfigDict, Field class FileRef(BaseModel): """A file entry with optional auth headers and per-file size override. Used when the file URL needs authentication (e.g. Paperless ``Token``) or a tighter size cap than :envvar:`IX_FILE_MAX_BYTES`. Plain URLs that need no headers can stay as bare ``str`` values in :attr:`Context.files`. """ model_config = ConfigDict(extra="forbid") url: str headers: dict[str, str] = Field(default_factory=dict) max_bytes: int | None = None class Context(BaseModel): """Document payload: files to fetch + OCR + any pre-extracted texts.""" model_config = ConfigDict(extra="forbid") files: list[str | FileRef] = Field(default_factory=list) texts: list[str] = Field(default_factory=list) class OCROptions(BaseModel): """Knobs for the OCR step. ``service`` is kept so the adapter point stays visible in the contract even though MVP only wires Surya. Future engines plug in behind the same name. """ model_config = ConfigDict(extra="forbid") use_ocr: bool = True ocr_only: bool = False include_ocr_text: bool = False include_geometries: bool = False service: Literal["surya"] = "surya" class GenAIOptions(BaseModel): """Knobs for the LLM step.""" model_config = ConfigDict(extra="forbid") gen_ai_model_name: str | None = None class ProvenanceOptions(BaseModel): """Knobs for the provenance + reliability steps.""" model_config = ConfigDict(extra="forbid") include_provenance: bool = True max_sources_per_field: int = 10 class Options(BaseModel): """Aggregate options block nested into :class:`RequestIX`.""" model_config = ConfigDict(extra="forbid") ocr: OCROptions = Field(default_factory=OCROptions) gen_ai: GenAIOptions = Field(default_factory=GenAIOptions) provenance: ProvenanceOptions = Field(default_factory=ProvenanceOptions) class RequestIX(BaseModel): """Top-level job request. ``ix_id`` is a transport-assigned 16-char hex handle. Callers MUST NOT set it; the REST adapter / pg-queue adapter populates it on insert. The field is kept here so the contract is closed-over-construction round-trips (e.g. when the worker re-hydrates a job out of the store). """ model_config = ConfigDict(extra="forbid") use_case: str ix_client_id: str request_id: str ix_id: str | None = None context: Context options: Options = Field(default_factory=Options) callback_url: str | None = None