Adds the incoming-request data contracts as Pydantic v2 models. Matches the MVP spec §3 exactly — fields dropped from the reference spec (use_vision, reasoning_effort, version, ...) stay out, and `extra="forbid"` catches any caller that sends them so drift surfaces immediately instead of silently. Context.files is `list[str | FileRef]`: plain URLs stay str, dict entries parse as FileRef. This keeps the common case (public URL) one-liner while still supporting Paperless-style auth headers and per-file size caps. ix_id stays optional with a docstring warning that callers MUST NOT set it — the transport layer assigns the 16-char hex handle on insert. The field is present so `Job` round-trips out of the store. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
103 lines
3.1 KiB
Python
103 lines
3.1 KiB
Python
"""Incoming request contracts — :class:`RequestIX` + nested option blocks.
|
|
|
|
Mirrors MVP spec §3 exactly. Dropped spec fields (e.g. ``use_vision``,
|
|
``reasoning_effort``, ``version``) are intentionally absent from this module;
|
|
if a caller sends one ``RequestIX`` rejects the payload (``extra="forbid"``).
|
|
|
|
The file-URL union keeps plain strings as ``str`` and dict entries as
|
|
:class:`FileRef` so callers can mix auth-required URLs with plain ones in a
|
|
single list without wrapping every entry.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Literal
|
|
|
|
from pydantic import BaseModel, ConfigDict, Field
|
|
|
|
|
|
class FileRef(BaseModel):
|
|
"""A file entry with optional auth headers and per-file size override.
|
|
|
|
Used when the file URL needs authentication (e.g. Paperless ``Token``) or a
|
|
tighter size cap than :envvar:`IX_FILE_MAX_BYTES`. Plain URLs that need no
|
|
headers can stay as bare ``str`` values in :attr:`Context.files`.
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
url: str
|
|
headers: dict[str, str] = Field(default_factory=dict)
|
|
max_bytes: int | None = None
|
|
|
|
|
|
class Context(BaseModel):
|
|
"""Document payload: files to fetch + OCR + any pre-extracted texts."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
files: list[str | FileRef] = Field(default_factory=list)
|
|
texts: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class OCROptions(BaseModel):
|
|
"""Knobs for the OCR step.
|
|
|
|
``service`` is kept so the adapter point stays visible in the contract even
|
|
though MVP only wires Surya. Future engines plug in behind the same name.
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
use_ocr: bool = True
|
|
ocr_only: bool = False
|
|
include_ocr_text: bool = False
|
|
include_geometries: bool = False
|
|
service: Literal["surya"] = "surya"
|
|
|
|
|
|
class GenAIOptions(BaseModel):
|
|
"""Knobs for the LLM step."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
gen_ai_model_name: str | None = None
|
|
|
|
|
|
class ProvenanceOptions(BaseModel):
|
|
"""Knobs for the provenance + reliability steps."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
include_provenance: bool = True
|
|
max_sources_per_field: int = 10
|
|
|
|
|
|
class Options(BaseModel):
|
|
"""Aggregate options block nested into :class:`RequestIX`."""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
ocr: OCROptions = Field(default_factory=OCROptions)
|
|
gen_ai: GenAIOptions = Field(default_factory=GenAIOptions)
|
|
provenance: ProvenanceOptions = Field(default_factory=ProvenanceOptions)
|
|
|
|
|
|
class RequestIX(BaseModel):
|
|
"""Top-level job request.
|
|
|
|
``ix_id`` is a transport-assigned 16-char hex handle. Callers MUST NOT set
|
|
it; the REST adapter / pg-queue adapter populates it on insert. The field
|
|
is kept here so the contract is closed-over-construction round-trips
|
|
(e.g. when the worker re-hydrates a job out of the store).
|
|
"""
|
|
|
|
model_config = ConfigDict(extra="forbid")
|
|
|
|
use_case: str
|
|
ix_client_id: str
|
|
request_id: str
|
|
ix_id: str | None = None
|
|
context: Context
|
|
options: Options = Field(default_factory=Options)
|
|
callback_url: str | None = None
|