48 changed files with 148 additions and 4216 deletions
--- a/.env.example
+++ b/.env.example
@ -4,11 +4,11 @@
 # the Postgres password.
 # --- Job store -----------------------------------------------------------
-IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@127.0.0.1:5431/infoxtractor
+IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.internal:5431/infoxtractor
 # --- LLM backend ---------------------------------------------------------
-IX_OLLAMA_URL=http://127.0.0.1:11434
+IX_OLLAMA_URL=http://host.docker.internal:11434
-IX_DEFAULT_MODEL=qwen3:14b
+IX_DEFAULT_MODEL=gpt-oss:20b
 # --- OCR -----------------------------------------------------------------
 IX_OCR_ENGINE=surya
--- a/.gitignore
+++ b/.gitignore
@ -15,7 +15,6 @@ dist/
 build/
 *.log
 /tmp/
 .claude/
 # uv
 # uv.lock is committed intentionally for reproducible builds.
--- a/AGENTS.md
+++ b/AGENTS.md
@ -4,11 +4,7 @@ Async, on-prem, LLM-powered structured information extraction microservice. Give
 Designed to be used by other on-prem services (e.g. mammon) as a reliable fallback / second opinion for format-specific deterministic parsers.
-Status: MVP deployed (2026-04-18) at `http://192.168.68.42:8994` — LAN only. Browser UI at `http://192.168.68.42:8994/ui`. Full reference spec at `docs/spec-core-pipeline.md`; MVP spec at `docs/superpowers/specs/2026-04-18-ix-mvp-design.md`; deploy runbook at `docs/deployment.md`.
+Status: design phase. Full reference spec at `docs/spec-core-pipeline.md`. MVP spec will live at `docs/superpowers/specs/`.
 Use cases: the built-in registry lives in `src/ix/use_cases/__init__.py` (`bank_statement_header` for MVP). Callers without a registered entry can ship an ad-hoc schema inline via `RequestIX.use_case_inline` (see README "Ad-hoc use cases"); the pipeline builds the Pydantic classes on the fly per request. The `/ui` page exposes this as a "custom" option so non-engineering users can experiment without a deploy.
 UX notes: the `/ui` job page surfaces queue position + elapsed MM:SS on each poll, renders the client-provided filename (stored via `FileRef.display_name`, optional metadata — the pipeline ignores it for execution), and shows a CPU-mode notice when `/healthz` reports `ocr_gpu: false`.
 ## Guiding Principles
@ -29,7 +25,7 @@ UX notes: the `/ui` job page surfaces queue position + elapsed MM:SS on each pol
 - **Language**: Python 3.12, asyncio
 - **Web/REST**: FastAPI + uvicorn
 - **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML)
- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `qwen3:14b`, configurable per use case
+- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `gpt-oss:20b`, configurable per use case
 - **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database
 - **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps)
--- a/69
+++ b/69
@ -1,69 +0,0 @@
 # InfoXtractor container image.
 #
 # Base image ships CUDA 12.4 runtime libraries so the Surya OCR client can
 # use the RTX 3090 on the deploy host. Ubuntu 22.04 is the LTS used across
 # the home-server stack (immich-ml, monitoring) so GPU drivers line up.
 FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1
 # --- System deps --------------------------------------------------------
 #   - python3.12 via deadsnakes PPA (pinned; Ubuntu 22.04 ships 3.10 only)
 #   - libmagic1  : python-magic backend for MIME sniffing
 #   - libgl1     : libGL.so needed by Pillow/OpenCV wheels used by Surya
 #   - libglib2.0 : shared by Pillow/PyMuPDF headless rendering
 #   - curl       : post-receive hook's /healthz probe & general ops
 #   - ca-certs   : httpx TLS verification
 RUN apt-get update \
 && apt-get install -y --no-install-recommends \
        software-properties-common \
        ca-certificates \
        curl \
        gnupg \
 && add-apt-repository -y ppa:deadsnakes/ppa \
 && apt-get update \
 && apt-get install -y --no-install-recommends \
        python3.12 \
        python3.12-venv \
        python3.12-dev \
        libmagic1 \
        libgl1 \
        libglib2.0-0 \
 && ln -sf /usr/bin/python3.12 /usr/local/bin/python \
 && ln -sf /usr/bin/python3.12 /usr/local/bin/python3 \
 && apt-get clean \
 && rm -rf /var/lib/apt/lists/*
 # --- uv (dependency resolver used by the project) -----------------------
 # Install via the standalone installer; avoids needing a working system pip
 # (python3.12 on Ubuntu 22.04 has no `distutils`, which breaks Ubuntu pip).
 RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
 && ln -sf /root/.local/bin/uv /usr/local/bin/uv
 WORKDIR /app
 # Copy dependency manifests + README early so the heavy `uv sync` layer
 # caches whenever only application code changes. README.md is required
 # because pyproject.toml names it as the package's readme — hatchling
 # validates it exists when resolving the editable install.
 COPY pyproject.toml uv.lock .python-version README.md ./
 # Prod + OCR extras, no dev tooling. --frozen means "must match uv.lock";
 # CI catches drift before it reaches the image.
 RUN uv sync --frozen --no-dev --extra ocr
 # --- Application code ---------------------------------------------------
 COPY src src
 COPY alembic alembic
 COPY alembic.ini ./
 EXPOSE 8994
 # Migrations are idempotent (alembic upgrade head is a no-op on a current
 # DB) so running them on every start keeps the image + DB aligned without
 # an extra orchestration step.
 CMD ["sh", "-c", "uv run alembic upgrade head && uv run uvicorn ix.app:create_app --factory --host 0.0.0.0 --port 8994"]
--- a/README.md
+++ b/README.md
@ -4,16 +4,10 @@ Async, on-prem, LLM-powered structured information extraction microservice.
 Given a document (PDF, image, text) and a named *use case*, ix returns a structured JSON result whose shape matches the use-case schema — together with per-field provenance (OCR segment IDs, bounding boxes, cross-OCR agreement flags) that let the caller decide how much to trust each extracted value.
-**Status:** MVP deployed. Live on the home LAN at `http://192.168.68.42:8994` (REST API + browser UI at `/ui`).
+**Status:** design phase. Implementation about to start.
 ## Web UI
 A minimal browser UI lives at [`http://192.168.68.42:8994/ui`](http://192.168.68.42:8994/ui): drop a PDF, pick a registered use case or define one inline, submit, see the pretty-printed result. HTMX polls the job status every 2 s until the pipeline finishes. LAN-only, no auth.
 - Full reference spec: [`docs/spec-core-pipeline.md`](docs/spec-core-pipeline.md) (aspirational; MVP is a strict subset)
 - **MVP design:** [`docs/superpowers/specs/2026-04-18-ix-mvp-design.md`](docs/superpowers/specs/2026-04-18-ix-mvp-design.md)
 - **Implementation plan:** [`docs/superpowers/plans/2026-04-18-ix-mvp-implementation.md`](docs/superpowers/plans/2026-04-18-ix-mvp-implementation.md)
 - **Deployment runbook:** [`docs/deployment.md`](docs/deployment.md)
 - Agent / development notes: [`AGENTS.md`](AGENTS.md)
 ## Principles
@ -21,75 +15,3 @@ A minimal browser UI lives at [`http://192.168.68.42:8994/ui`](http://192.168.68
 - **On-prem always.** LLM = Ollama, OCR = local engines (Surya first). No OpenAI / Anthropic / Azure / AWS / cloud.
 - **Grounded extraction, not DB truth.** ix returns best-effort fields + provenance; the caller decides what to trust.
 - **Transport-agnostic pipeline core.** REST + Postgres-queue adapters in parallel on one job store.
 ## Submitting a job
 ```bash
 curl -X POST http://192.168.68.42:8994/jobs \
  -H "Content-Type: application/json" \
  -d '{
    "use_case": "bank_statement_header",
    "ix_client_id": "mammon",
    "request_id": "some-correlation-id",
    "context": {
      "files": [{
        "url": "http://paperless.local/api/documents/42/download/",
        "headers": {"Authorization": "Token …"}
      }],
      "texts": ["<Paperless Tesseract OCR content>"]
    }
  }'
 # → {"job_id":"…","ix_id":"…","status":"pending"}
 ```
 Poll `GET /jobs/{job_id}` until `status` is `done` or `error`. Optionally pass `callback_url` to receive a webhook on completion (one-shot, no retry; polling stays authoritative).
 ### Ad-hoc use cases
 For one-offs where a registered use case doesn't exist yet, ship the schema inline:
 ```jsonc
 {
  "use_case": "adhoc-invoice",        // free-form label (logs/metrics only)
  "use_case_inline": {
    "use_case_name": "Invoice totals",
    "system_prompt": "Extract vendor and total amount.",
    "fields": [
      {"name": "vendor", "type": "str", "required": true},
      {"name": "total",  "type": "decimal"},
      {"name": "currency", "type": "str", "choices": ["USD", "EUR", "CHF"]}
    ]
  },
  // ...ix_client_id, request_id, context...
 }
 ```
 When `use_case_inline` is set, the pipeline builds the response schema on the fly and skips the registry. Supported types: `str`, `int`, `float`, `decimal`, `date`, `datetime`, `bool`. `choices` is only allowed on `str` fields. Precedence: inline wins over `use_case` when both are present.
 Full REST surface + provenance response shape documented in the MVP design spec.
 ## Running locally
 ```bash
 uv sync --extra dev
 uv run pytest tests/unit -v                    # hermetic unit + integration suite
 IX_TEST_OLLAMA=1 uv run pytest tests/live -v    # needs LAN access to Ollama + GPU
 ```
 ### UI queue + progress UX
 The `/ui` job page polls `GET /ui/jobs/{id}/fragment` every 2 s and surfaces:
 - **Queue position** while pending: "Queue position: N ahead — M jobs total in flight (single worker)" so it's obvious a new submission is waiting on an earlier job rather than stuck. "About to start" when the worker has just freed up.
 - **Elapsed time** while running ("Running for MM:SS") and on finish ("Finished in MM:SS").
 - **Original filename** — the UI stashes the client-provided upload name in `FileRef.display_name` so the browser shows `your_statement.pdf` instead of the on-disk UUID.
 - **CPU-mode notice** when `/healthz` reports `ocr_gpu: false` (the Surya OCR client observed `torch.cuda.is_available() == False`): a collapsed `<details>` pointing at the deployment runbook.
 ## Deploying
 ```bash
 git push server main      # rebuilds Docker image, restarts container, /healthz deploy gate
 python scripts/e2e_smoke.py   # E2E acceptance against the live service
 ```
 See [`docs/deployment.md`](docs/deployment.md) for full runbook + rollback.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,42 +0,0 @@
 # InfoXtractor Docker Compose stack.
 #
 # Single service. Uses host networking so the container can reach:
 #   - Ollama at 127.0.0.1:11434
 #   - postgis at 127.0.0.1:5431 (bound to loopback only; security hardening)
 # Both services are LAN-hardened on the host and never exposed publicly,
 # so host-network access stays on-prem. This matches the `goldstein`
 # container pattern on the same server.
 #
 # The GPU reservation block matches immich-ml / the shape Docker Compose
 # expects for GPU allocation on this host.
 name: infoxtractor
 services:
  infoxtractor:
    build: .
    container_name: infoxtractor
    network_mode: host
    restart: always
    env_file: .env
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: 1
              capabilities: [gpu]
    volumes:
      # Persist Surya (datalab) + HuggingFace model caches so rebuilds don't
      # re-download ~1.5 GB of weights every time.
      - ix_surya_cache:/root/.cache/datalab
      - ix_hf_cache:/root/.cache/huggingface
    labels:
      infrastructure.web_url: "http://192.168.68.42:8994"
      backup.enable: "true"
      backup.type: "postgres"
      backup.name: "infoxtractor"
 volumes:
  ix_surya_cache:
  ix_hf_cache:
--- a/docs/deployment.md
+++ b/docs/deployment.md
@ -1,153 +0,0 @@
 # Deployment
 On-prem deploy to `192.168.68.42`. Push-to-deploy via a bare git repo + `post-receive` hook that rebuilds the Docker Compose stack. Pattern mirrors mammon and unified_messaging.
 ## Topology
 ```
 Mac (dev)
  │  git push server main
  ▼
 192.168.68.42:/home/server/Public/infoxtractor/repos.git   (bare)
  │  post-receive → GIT_WORK_TREE=/…/app git checkout -f main
  │                 docker compose up -d --build
  │                 curl /healthz (60 s gate)
  ▼
 Docker container `infoxtractor` (port 8994)
  ├─ 127.0.0.1:11434  →  Ollama (qwen3:14b; host-network mode)
  └─ 127.0.0.1:5431   →  postgis (database `infoxtractor`; host-network mode)
 ```
 ## One-time server setup
 Run **once** from the Mac. Idempotent.
 ```bash
 export IX_POSTGRES_PASSWORD=<generate-a-strong-one>
 ./scripts/setup_server.sh
 ```
 The script:
 1. Creates `/home/server/Public/infoxtractor/repos.git` (bare) + `/home/server/Public/infoxtractor/app/` (worktree).
 2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template).
 3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container.
 4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in.
 5. Verifies `qwen3:14b` is pulled in Ollama.
 6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing.
 After the script finishes, add the deploy remote to the local repo:
 ```bash
 git remote add server ssh://server@192.168.68.42/home/server/Public/infoxtractor/repos.git
 ```
 ## Normal deploy workflow
 ```bash
 # after merging a feat branch into main
 git push server main
 # tail the server's deploy log
 ssh server@192.168.68.42 "tail -f /tmp/infoxtractor-deploy.log"
 # healthz gate (the post-receive hook also waits up to 60 s for this)
 curl http://192.168.68.42:8994/healthz
 # end-to-end smoke — this IS the real acceptance test
 python scripts/e2e_smoke.py
 ```
 If the post-receive hook exits non-zero (healthz never reaches 200), the deploy is considered failed. The previous container keeps running (the hook swaps via `docker compose up -d --build`, which first builds the new image and only swaps if the build succeeds; if the new container fails `/healthz`, it's still up but broken). Investigate with `docker compose logs --tail 200` in `${APP_DIR}` and either fix forward or revert (see below).
 ## Rollback
 Never force-push `main`. Rollbacks happen as **forward commits** via `git revert`:
 ```bash
 git revert HEAD     # creates a revert commit for the last change
 git push forgejo main
 git push server main
 ```
 ## First deploy
 - **Date:** 2026-04-18
 - **Commit:** `fix/ollama-extract-json` (#36, the last of several Docker/ops follow-ups after PR #27 shipped the initial Dockerfile)
 - **`/healthz`:** all three probes (`postgres`, `ollama`, `ocr`) green. First-pass took ~7 min for the fresh container because Surya's recognition (1.34 GB) + detection (73 MB) models download from HuggingFace on first run; subsequent rebuilds reuse the named volumes declared in `docker-compose.yml` and come up in <30 s.
 - **E2E extraction:** `bank_statement_header` against `tests/fixtures/synthetic_giro.pdf` with Paperless-style texts:
  - Pipeline completes in **35 s**.
  - Extracted: `bank_name=DKB`, `account_iban=DE89370400440532013000`, `currency=EUR`, `opening_balance=1234.56`, `closing_balance=1450.22`, `statement_date=2026-03-31`, `statement_period_end=2026-03-31`, `statement_period_start=2026-03-01`, `account_type=null`.
  - Provenance: 8 / 9 leaf fields have sources; 7 / 8 `provenance_verified` and `text_agreement` are True. `statement_period_start` shows up in the OCR but normalisation fails (dateutil picks a different interpretation of the cited day); to be chased in a follow-up.
 ### Docker-ops follow-ups that landed during the first deploy
 All small, each merged as its own PR. In commit order after the scaffold (#27):
 - **#31** `fix(docker): uv via standalone installer` — Python 3.12 on Ubuntu 22.04 drops `distutils`; Ubuntu's pip needed it. Switched to the `uv` standalone installer, which has no pip dependency.
 - **#32** `fix(docker): include README.md in the uv sync COPY` — `hatchling` validates the readme file exists when resolving the editable project install.
 - **#33** `fix(compose): drop runtime: nvidia` — the deploy host's Docker daemon doesn't register a named `nvidia` runtime; `deploy.resources.devices` is sufficient and matches immich-ml.
 - **#34** `fix(deploy): network_mode: host` — `postgis` is bound to `127.0.0.1` on the host (security hardening T12). `host.docker.internal` points at the bridge gateway, not loopback, so the container couldn't reach postgis. Goldstein uses the same pattern.
 - **#35** `fix(deps): pin surya-ocr ^0.17` — earlier cu124 torch pin had forced surya to 0.14.1, which breaks our `surya.foundation` import and needs a transformers version that lacks `QuantizedCacheConfig`.
 - **#36** `fix(genai): drop Ollama format flag; extract trailing JSON` — Ollama 0.11.8 segfaults on Pydantic JSON Schemas (`$ref`, `anyOf`, `pattern`), and `format="json"` terminates reasoning models (qwen3) at `{}` because their `<think>…</think>` chain-of-thought isn't valid JSON. Omit the flag, inject the schema into the system prompt, extract the outermost `{…}` balanced block from the response.
 - **volumes** — named `ix_surya_cache` + `ix_hf_cache` mount `/root/.cache/datalab` + `/root/.cache/huggingface` so rebuilds don't re-download ~1.5 GB of model weights.
 Production notes:
 - `IX_DEFAULT_MODEL=qwen3:14b` (already pulled on the host). Spec listed `gpt-oss:20b` as a concrete example; swapped to keep the deploy on-prem without an extra `ollama pull`.
 - Torch 2.11 default cu13 wheels fall back to CPU against the host's CUDA 12.4 driver — Surya runs on CPU. Expected inference times: seconds per page. Upgrading the NVIDIA driver (or pinning a cu12-compatible torch wheel newer than 2.7) will unlock GPU with no code changes.
 ## E2E smoke test (`scripts/e2e_smoke.py`)
 What it does (from the Mac):
 1. Checks `/healthz`.
 2. Starts a tiny HTTP server on the Mac's LAN IP serving `tests/fixtures/synthetic_giro.pdf`.
 3. Submits a `POST /jobs` with `use_case=bank_statement_header`, the fixture URL in `context.files`, and a Paperless-style OCR text in `context.texts` (to exercise the `text_agreement` cross-check).
 4. Polls `GET /jobs/{id}` every 2 s until terminal or 120 s timeout.
 5. Asserts: `status=="done"`, `bank_name` non-empty, `provenance.fields["result.closing_balance"].provenance_verified=True`, `text_agreement=True`, total elapsed `< 60s`.
 Non-zero exit means the deploy is not healthy. Roll back via `git revert HEAD`.
 ## Operational checklists
 ### After `ollama pull` on the host
 The `IX_DEFAULT_MODEL` env var on the server's `.env` must match something in `ollama list`. Changing the default means:
 1. Edit `/home/server/Public/infoxtractor/app/.env` → `IX_DEFAULT_MODEL=<new>`.
 2. `docker compose --project-directory /home/server/Public/infoxtractor/app restart`.
 3. `curl http://192.168.68.42:8994/healthz` → confirm `ollama: ok`.
 ### If `/healthz` shows `ollama: degraded`
 `qwen3:14b` (or the configured default) is not pulled. On the host:
 ```bash
 ssh server@192.168.68.42 "docker exec ollama ollama pull qwen3:14b"
 ```
 ### If `/healthz` shows `ocr: fail`
 Surya couldn't initialize (model missing, CUDA unavailable, OOM). First run can be slow — models download on first call. Check container logs:
 ```bash
 ssh server@192.168.68.42 "docker logs infoxtractor --tail 200"
 ```
 ### If the container fails to start
 ```bash
 ssh server@192.168.68.42 "tail -100 /tmp/infoxtractor-deploy.log"
 ssh server@192.168.68.42 "docker compose -f /home/server/Public/infoxtractor/app/docker-compose.yml logs --tail 200"
 ```
 ## Monitoring
 - Monitoring dashboard auto-discovers via the `infrastructure.web_url` label on the container: `http://192.168.68.42:8001` → "infoxtractor" card.
 - Backup opt-in via `backup.enable=true` + `backup.type=postgres` + `backup.name=infoxtractor` labels. The daily backup script picks up the `infoxtractor` Postgres database automatically.
 ## Ports
 | Port | Direction | Source | Service |
 |------|-----------|--------|---------|
 | 8994/tcp | ALLOW | 192.168.68.0/24 | ix REST + healthz (LAN only; not publicly exposed) |
 No VPS Caddy entry; no `infrastructure.docs_url` label — this is an internal service.
--- a/docs/superpowers/specs/2026-04-18-ix-mvp-design.md
+++ b/docs/superpowers/specs/2026-04-18-ix-mvp-design.md
@ -85,7 +85,6 @@ class FileRef(BaseModel):
    url: str                                 # http(s):// or file://
    headers: dict[str, str] = {}             # e.g. {"Authorization": "Token …"}
    max_bytes: Optional[int] = None          # per-file override; defaults to IX_FILE_MAX_BYTES
    display_name: Optional[str] = None       # UI-only metadata; client-provided filename for display (pipeline ignores)
 class Options(BaseModel):
    ocr: OCROptions = OCROptions()
@ -109,25 +108,6 @@ class ProvenanceOptions(BaseModel):
 **Dropped from spec (no-ops under MVP):** `OCROptions.computer_vision_scaling_factor`, `include_page_tags` (always on), `GenAIOptions.use_vision`/`vision_scaling_factor`/`vision_detail`/`reasoning_effort`, `ProvenanceOptions.granularity`/`include_bounding_boxes`/`source_type`/`min_confidence`, `RequestIX.version`.
 **Ad-hoc use cases (post-MVP add-on).** `RequestIX` carries an optional `use_case_inline: InlineUseCase | None = None`. When set, the pipeline builds the `(Request, Response)` Pydantic class pair on the fly from that inline definition and **skips the registry lookup entirely** — the `use_case` field becomes a free-form label (still required for metrics / logging). Inline definitions look like:
 ```python
 class UseCaseFieldDef(BaseModel):
    name: str                               # valid Python identifier
    type: Literal["str", "int", "float", "decimal", "date", "datetime", "bool"]
    required: bool = False
    description: str | None = None
    choices: list[str] | None = None        # str-typed fields only; builds Literal[*choices]
 class InlineUseCase(BaseModel):
    use_case_name: str
    system_prompt: str
    default_model: str | None = None
    fields: list[UseCaseFieldDef]
 ```
 Precedence: `use_case_inline` wins when both are set. Structural errors (dup field name, invalid identifier, `choices` on a non-str type, empty fields list) raise `IX_001_001` (same code as registry miss). The builder lives in `ix.use_cases.inline.build_use_case_classes` and returns fresh classes per call — the pipeline never caches them.
 ### ResponseIX
 Identical to spec §2.2 except `FieldProvenance` gains two fields:
@ -226,15 +206,14 @@ Callers that prefer direct SQL (the `pg_queue_adapter` contract): insert a row w
 | `POST` | `/jobs` | Body = `RequestIX` (+ optional `callback_url`). → `201 {job_id, ix_id, status: "pending"}`. Idempotent on `(ix_client_id, request_id)` — same pair returns the existing `job_id` with `200`. |
 | `GET`  | `/jobs/{job_id}` | → full `Job`. Source of truth regardless of submission path or callback outcome. |
 | `GET`  | `/jobs?client_id=…&request_id=…` | Lookup-by-correlation (caller idempotency helper). The pair is UNIQUE in the table → at most one match. Returns the job or `404`. |
-| `GET`  | `/healthz` | `{postgres, ollama, ocr, ocr_gpu}`. See below for semantics. Used by `infrastructure` monitoring dashboard. `ocr_gpu` is additive metadata (not part of the gate). |
+| `GET`  | `/healthz` | `{postgres, ollama, ocr}`. See below for semantics. Used by `infrastructure` monitoring dashboard. |
 | `GET`  | `/metrics` | Counters over the last 24 hours: `jobs_pending`, `jobs_running`, `jobs_done_24h`, `jobs_error_24h`, per-use-case avg seconds over the same window. Plain JSON, no Prometheus format for MVP. |
 **`/healthz` semantics:**
 - `postgres`: `SELECT 1` on the job store pool; `ok` iff the query returns within 2 s.
 - `ollama`: `GET {IX_OLLAMA_URL}/api/tags` within 5 s; `ok` iff reachable AND the default model (`IX_DEFAULT_MODEL`) is listed in the tags response; `degraded` iff reachable but the model is missing (ops action: run `ollama pull <model>` on the host); `fail` on any other error.
 - `ocr`: `SuryaOCRClient.selfcheck()` — returns `ok` iff CUDA is available and the Surya text-recognition model is loaded into GPU memory at process start. `fail` on any error.
- `ocr_gpu`: `true | false | null`. Additive metadata: reports whether the OCR client observed `torch.cuda.is_available() == True` at first warm-up. `null` means not yet probed (fresh process, fake client, etc.). The UI reads this to surface a CPU-mode slowdown notice; never part of the 200/503 gate.
+- Overall HTTP status: `200` iff all three are `ok`; `503` otherwise. The monitoring dashboard only surfaces `200`/`non-200`.
 - Overall HTTP status: `200` iff all three core statuses (`postgres`, `ollama`, `ocr`) are `ok`; `503` otherwise. `ocr_gpu` does not affect the gate. The monitoring dashboard only surfaces `200`/`non-200`.
 **Callback delivery** (when `callback_url` is set): one POST of the full `Job` body, 10 s timeout. 2xx → `callback_status='delivered'`. Anything else → `'failed'`. No retry. Callers always have `GET /jobs/{id}` as the authoritative fallback.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,8 +1,6 @@
 [project]
 name = "infoxtractor"
 version = "0.1.0"
 # Released 2026-04-18 with the first live deploy of the MVP. See
 # docs/deployment.md §"First deploy" for the commit + /healthz times.
 description = "Async on-prem LLM-powered structured information extraction microservice"
 readme = "README.md"
 requires-python = ">=3.12"
@ -29,24 +27,14 @@ dependencies = [
    "pillow>=10.2,<11.0",
    "python-magic>=0.4.27",
    "python-dateutil>=2.9",
    # UI (HTMX + Jinja2 templates served from /ui). Both arrive as transitive
    # deps via FastAPI/Starlette already, but we pin explicitly so the import
    # surface is owned by us. python-multipart backs FastAPI's `Form()` /
    # `UploadFile` parsing — required by `/ui/jobs` submissions.
    "jinja2>=3.1",
    "aiofiles>=24.1",
    "python-multipart>=0.0.12",
 ]
 [project.optional-dependencies]
 ocr = [
-    # Real OCR engine. Kept optional so CI (no GPU) can install the base
+    # Real OCR engine — pulls torch + CUDA wheels. Kept optional so CI
-    # package without the model deps.
+    # (no GPU) can install the base package without the model deps.
-    # surya >= 0.17 is required: the client code uses the
+    "surya-ocr>=0.9",
-    # `surya.foundation` module, which older releases don't expose.
+    "torch>=2.4",
    "surya-ocr>=0.17,<0.18",
    "torch>=2.7",
 ]
 dev = [
    "pytest>=8.3",
@ -56,11 +44,6 @@ dev = [
    "mypy>=1.13",
 ]
 # Note: the default pypi torch ships cu13 wheels, which emit a
 # UserWarning and fall back to CPU against the deploy host's CUDA 12.4
 # driver. Surya then runs on CPU — slower but correct for MVP. A future
 # driver upgrade unlocks GPU Surya with no code changes.
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
--- a/scripts/e2e_smoke.py
+++ b/scripts/e2e_smoke.py
@ -1,210 +0,0 @@
 """End-to-end smoke test against the deployed infoxtractor service.
 Uploads a synthetic bank-statement fixture, polls for completion, and asserts
 the provenance flags per spec §12 E2E. Intended to run from the Mac after
 every `git push server main` as the deploy gate.
 Prerequisites:
  - The service is running and reachable at --base-url (default
    http://192.168.68.42:8994).
  - The fixture `tests/fixtures/synthetic_giro.pdf` is present.
  - The Mac and the server are on the same LAN (the server must be able to
    reach the Mac to download the fixture).
 Exit codes:
  0  all assertions passed within the timeout
  1  at least one assertion failed
  2  the job never reached a terminal state in time
  3  the service was unreachable or returned an unexpected error
 Usage:
  python scripts/e2e_smoke.py
  python scripts/e2e_smoke.py --base-url http://localhost:8994
 """
 from __future__ import annotations
 import argparse
 import http.server
 import json
 import socket
 import socketserver
 import sys
 import threading
 import time
 import urllib.error
 import urllib.request
 import uuid
 from pathlib import Path
 DEFAULT_BASE_URL = "http://192.168.68.42:8994"
 FIXTURE = Path(__file__).parent.parent / "tests" / "fixtures" / "synthetic_giro.pdf"
 TIMEOUT_SECONDS = 120
 POLL_INTERVAL_SECONDS = 2
 def find_lan_ip() -> str:
    """Return the Mac's LAN IP that the server can reach."""
    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    try:
        # 192.168.68.42 is the server; getting the default route towards it
        # yields the NIC with the matching subnet.
        s.connect(("192.168.68.42", 80))
        return s.getsockname()[0]
    finally:
        s.close()
 def serve_fixture_in_background(fixture: Path) -> tuple[str, threading.Event]:
    """Serve the fixture on a temporary HTTP server; return the URL and a stop event."""
    if not fixture.exists():
        print(f"FIXTURE MISSING: {fixture}", file=sys.stderr)
        sys.exit(3)
    directory = fixture.parent
    filename = fixture.name
    lan_ip = find_lan_ip()
    class Handler(http.server.SimpleHTTPRequestHandler):
        def __init__(self, *args, **kwargs):
            super().__init__(*args, directory=str(directory), **kwargs)
        def log_message(self, format: str, *args) -> None:  # quiet
            pass
    # Pick any free port.
    httpd = socketserver.TCPServer((lan_ip, 0), Handler)
    port = httpd.server_address[1]
    url = f"http://{lan_ip}:{port}/{filename}"
    stop = threading.Event()
    def _serve():
        try:
            while not stop.is_set():
                httpd.handle_request()
        finally:
            httpd.server_close()
    # Run in a thread. Use a loose timeout so handle_request returns when stop is set.
    httpd.timeout = 0.5
    t = threading.Thread(target=_serve, daemon=True)
    t.start()
    return url, stop
 def post_job(base_url: str, file_url: str, client_id: str, request_id: str) -> dict:
    # Include a Paperless-style OCR of the fixture as context.texts so the
    # text_agreement cross-check has something to compare against.
    paperless_text = (
        "DKB\n"
        "DE89370400440532013000\n"
        "Statement period: 01.03.2026 - 31.03.2026\n"
        "Opening balance: 1234.56 EUR\n"
        "Closing balance: 1450.22 EUR\n"
        "31.03.2026\n"
    )
    payload = {
        "use_case": "bank_statement_header",
        "ix_client_id": client_id,
        "request_id": request_id,
        "context": {
            "files": [file_url],
            "texts": [paperless_text],
        },
    }
    req = urllib.request.Request(
        f"{base_url}/jobs",
        data=json.dumps(payload).encode("utf-8"),
        headers={"Content-Type": "application/json"},
        method="POST",
    )
    with urllib.request.urlopen(req, timeout=10) as resp:
        return json.loads(resp.read().decode("utf-8"))
 def get_job(base_url: str, job_id: str) -> dict:
    req = urllib.request.Request(f"{base_url}/jobs/{job_id}")
    with urllib.request.urlopen(req, timeout=10) as resp:
        return json.loads(resp.read().decode("utf-8"))
 def main() -> int:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
    parser.add_argument("--timeout", type=int, default=TIMEOUT_SECONDS)
    args = parser.parse_args()
    # Sanity-check the service is up.
    try:
        with urllib.request.urlopen(f"{args.base_url}/healthz", timeout=5) as resp:
            health = json.loads(resp.read().decode("utf-8"))
            print(f"healthz: {health}")
    except urllib.error.URLError as e:
        print(f"service unreachable: {e}", file=sys.stderr)
        return 3
    fixture_url, stop_server = serve_fixture_in_background(FIXTURE)
    print(f"serving fixture at {fixture_url}")
    try:
        client_id = "e2e_smoke"
        request_id = f"smoke-{uuid.uuid4().hex[:8]}"
        submit = post_job(args.base_url, fixture_url, client_id, request_id)
        job_id = submit["job_id"]
        print(f"submitted job_id={job_id}")
        started = time.monotonic()
        last_status = None
        job = None
        while time.monotonic() - started < args.timeout:
            job = get_job(args.base_url, job_id)
            if job["status"] != last_status:
                print(f"[{time.monotonic() - started:5.1f}s] status={job['status']}")
                last_status = job["status"]
            if job["status"] in ("done", "error"):
                break
            time.sleep(POLL_INTERVAL_SECONDS)
        else:
            print(f"FAIL: timed out after {args.timeout}s", file=sys.stderr)
            return 2
        assert job is not None
        failed = []
        if job["status"] != "done":
            failed.append(f"status={job['status']!r} (want 'done')")
        response = job.get("response") or {}
        if response.get("error"):
            failed.append(f"response.error={response['error']!r}")
        result = (response.get("ix_result") or {}).get("result") or {}
        bank = result.get("bank_name")
        if not isinstance(bank, str) or not bank.strip():
            failed.append(f"bank_name={bank!r} (want non-empty string)")
        fields = (response.get("provenance") or {}).get("fields") or {}
        closing = fields.get("result.closing_balance") or {}
        if not closing.get("provenance_verified"):
            failed.append(f"closing_balance.provenance_verified={closing.get('provenance_verified')!r}")
        if closing.get("text_agreement") is not True:
            failed.append(f"closing_balance.text_agreement={closing.get('text_agreement')!r} (Paperless-style text submitted)")
        elapsed = time.monotonic() - started
        if elapsed >= 60:
            failed.append(f"elapsed={elapsed:.1f}s (≥ 60s; slow path)")
        print(json.dumps(result, indent=2, default=str))
        if failed:
            print("\n".join(f"FAIL: {f}" for f in failed), file=sys.stderr)
            return 1
        print(f"\nPASS in {elapsed:.1f}s")
        return 0
    finally:
        stop_server.set()
 if __name__ == "__main__":
    sys.exit(main())
--- a/scripts/setup_server.sh
+++ b/scripts/setup_server.sh
@ -1,127 +0,0 @@
 #!/usr/bin/env bash
 # One-shot server setup for InfoXtractor. Idempotent: safe to re-run.
 #
 # Run from the Mac:
 #   IX_POSTGRES_PASSWORD=<pw> ./scripts/setup_server.sh
 #
 # What it does on 192.168.68.42:
 #   1. Creates the bare git repo `/home/server/Public/infoxtractor/repos.git` if missing.
 #   2. Writes the post-receive hook (or updates it) and makes it executable.
 #   3. Creates the Postgres role + database on the shared `postgis` container.
 #   4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example.
 #   5. Verifies `qwen3:14b` is pulled in Ollama.
 set -euo pipefail
 SERVER="${IX_SERVER:-server@192.168.68.42}"
 APP_BASE="/home/server/Public/infoxtractor"
 REPOS_GIT="${APP_BASE}/repos.git"
 APP_DIR="${APP_BASE}/app"
 DB_NAME="infoxtractor"
 DB_USER="infoxtractor"
 if [ -z "${IX_POSTGRES_PASSWORD:-}" ]; then
  read -r -s -p "Postgres password for role '${DB_USER}': " IX_POSTGRES_PASSWORD
  echo
 fi
 if [ -z "${IX_POSTGRES_PASSWORD}" ]; then
  echo "IX_POSTGRES_PASSWORD is required." >&2
  exit 1
 fi
 echo "==> 1/5  Ensuring bare repo + post-receive hook on ${SERVER}"
 ssh "${SERVER}" bash -s <<EOF
 set -euo pipefail
 mkdir -p "${REPOS_GIT}" "${APP_DIR}"
 if [ ! -f "${REPOS_GIT}/HEAD" ]; then
  git init --bare "${REPOS_GIT}"
 fi
 cat >"${REPOS_GIT}/hooks/post-receive" <<'HOOK'
 #!/usr/bin/env bash
 set -eo pipefail
 APP_DIR="${APP_DIR}"
 LOG="/tmp/infoxtractor-deploy.log"
 echo "[\$(date -u '+%FT%TZ')] post-receive start" >> "\$LOG"
 mkdir -p "\$APP_DIR"
 GIT_WORK_TREE="\$APP_DIR" git --git-dir="${REPOS_GIT}" checkout -f main >> "\$LOG" 2>&1
 cd "\$APP_DIR"
 docker compose up -d --build >> "\$LOG" 2>&1
 # Deploy gate: /healthz must return 200 within 60 s.
 for i in \$(seq 1 30); do
  if curl -fsS http://localhost:8994/healthz > /dev/null 2>&1; then
    echo "[\$(date -u '+%FT%TZ')] healthz OK" >> "\$LOG"
    exit 0
  fi
  sleep 2
 done
 echo "[\$(date -u '+%FT%TZ')] healthz never reached OK" >> "\$LOG"
 docker compose logs --tail 100 >> "\$LOG" 2>&1 || true
 exit 1
 HOOK
 chmod +x "${REPOS_GIT}/hooks/post-receive"
 EOF
 echo "==> 2/5  Verifying Ollama has qwen3:14b pulled"
 if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'qwen3:14b'"; then
  echo "FAIL: qwen3:14b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull qwen3:14b'" >&2
  exit 1
 fi
 echo "==> 3/5  Creating Postgres role '${DB_USER}' and database '${DB_NAME}' on postgis container"
 # Idempotent via DO blocks; uses docker exec to avoid needing psql on the host.
 ssh "${SERVER}" bash -s <<EOF
 set -euo pipefail
 docker exec -i postgis psql -U postgres <<SQL
 DO \\\$\\\$
 BEGIN
  IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '${DB_USER}') THEN
    CREATE ROLE ${DB_USER} LOGIN PASSWORD '${IX_POSTGRES_PASSWORD}';
  ELSE
    ALTER ROLE ${DB_USER} WITH PASSWORD '${IX_POSTGRES_PASSWORD}';
  END IF;
 END
 \\\$\\\$;
 SQL
 if ! docker exec -i postgis psql -U postgres -tc "SELECT 1 FROM pg_database WHERE datname = '${DB_NAME}'" | grep -q 1; then
  docker exec -i postgis createdb -U postgres -O ${DB_USER} ${DB_NAME}
 fi
 EOF
 echo "==> 4/5  Writing ${APP_DIR}/.env on the server"
 # Render .env from the repo's .env.example, substituting the password placeholder.
 LOCAL_ENV_CONTENT="$(
  sed "s#<password>#${IX_POSTGRES_PASSWORD}#g" \
      "$(dirname "$0")/../.env.example"
 )"
 # Append the IX_TEST_MODE=production for safety (fake mode stays off).
 # .env is written atomically and permissioned 0600.
 ssh "${SERVER}" "install -d -m 0755 '${APP_DIR}' && cat > '${APP_DIR}/.env' <<'ENVEOF'
 ${LOCAL_ENV_CONTENT}
 ENVEOF
 chmod 0600 '${APP_DIR}/.env'"
 echo "==> 5/5  Checking UFW rule for port 8994 (LAN only)"
 ssh "${SERVER}" "sudo ufw status numbered | grep -F 8994" >/dev/null 2>&1 || {
  echo "NOTE: UFW doesn't yet allow 8994. Run on the server:"
  echo "  sudo ufw allow from 192.168.68.0/24 to any port 8994 proto tcp"
 }
 echo
 echo "Done."
 echo
 echo "Next steps (on the Mac):"
 echo "  git remote add server ssh://server@192.168.68.42${REPOS_GIT}"
 echo "  git push server main"
 echo "  ssh ${SERVER} 'tail -f /tmp/infoxtractor-deploy.log'"
 echo "  curl http://192.168.68.42:8994/healthz"
 echo "  python scripts/e2e_smoke.py"
--- a/src/ix/adapters/rest/routes.py
+++ b/src/ix/adapters/rest/routes.py
@ -18,7 +18,7 @@ from __future__ import annotations
 import asyncio
 from collections.abc import Callable
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from datetime import UTC, datetime, timedelta
 from typing import Annotated, Literal
 from uuid import UUID
@ -44,15 +44,10 @@ class Probes:
    keeping them sync lets tests pass plain lambdas. Real probes that need
    async work run the call through ``asyncio.run_in_executor`` inside the
    callable (Chunk 4).
    ``ocr_gpu`` is additive metadata for the UI (not a health gate): returns
    ``True`` iff the OCR client reports CUDA is available, ``False`` for
    explicit CPU-mode, ``None`` if unknown (fake client, not yet warmed up).
    """
    ollama: Callable[[], Literal["ok", "degraded", "fail"]]
    ocr: Callable[[], Literal["ok", "fail"]]
    ocr_gpu: Callable[[], bool | None] = field(default=lambda: None)
 def get_session_factory_dep() -> async_sessionmaker[AsyncSession]:
@ -168,16 +163,8 @@ async def healthz(
    except Exception:
        ocr_state = "fail"
    try:
        ocr_gpu_state: bool | None = probes.ocr_gpu()
    except Exception:
        ocr_gpu_state = None
    body = HealthStatus(
-        postgres=postgres_state,
+        postgres=postgres_state, ollama=ollama_state, ocr=ocr_state
        ollama=ollama_state,
        ocr=ocr_state,
        ocr_gpu=ocr_gpu_state,
    )
    if postgres_state != "ok" or ollama_state != "ok" or ocr_state != "ok":
        response.status_code = 503
--- a/src/ix/adapters/rest/schemas.py
+++ b/src/ix/adapters/rest/schemas.py
@ -28,15 +28,9 @@ class HealthStatus(BaseModel):
    """Body of GET /healthz.
    Each field reports per-subsystem state. Overall HTTP status is 200 iff
-    every of the three core status keys is ``"ok"`` (spec §5). ``ollama`` can
+    every field is ``"ok"`` (spec §5). ``ollama`` can be ``"degraded"``
-    be ``"degraded"`` when the backend is reachable but the default model
+    when the backend is reachable but the default model isn't pulled —
-    isn't pulled — monitoring surfaces that as non-200.
+    monitoring surfaces that as non-200.
    ``ocr_gpu`` is additive metadata, not part of the health gate: it reports
    whether the Surya OCR client observed ``torch.cuda.is_available() == True``
    on first warm-up. ``None`` means we haven't probed yet (fresh process,
    fake client, or warm_up hasn't happened). The UI reads this to surface a
    CPU-mode slowdown warning to users.
    """
    model_config = ConfigDict(extra="forbid")
@ -44,7 +38,6 @@ class HealthStatus(BaseModel):
    postgres: Literal["ok", "fail"]
    ollama: Literal["ok", "degraded", "fail"]
    ocr: Literal["ok", "fail"]
    ocr_gpu: bool | None = None
 class MetricsResponse(BaseModel):
--- a/src/ix/app.py
+++ b/src/ix/app.py
@ -5,149 +5,21 @@ worker loop (Task 3.5) and the pg_queue listener (Task 3.6). Tests that
 don't care about the worker call ``create_app(spawn_worker=False)`` so the
 lifespan returns cleanly.
-Task 4.3 fills in the production wiring:
+The factory is parameterised (``spawn_worker``) instead of env-gated because
-
+pytest runs multiple app instances per session and we want the decision local
-* Factories (``make_genai_client`` / ``make_ocr_client``) pick between
+to each call, not inferred from ``IX_*`` variables. The listener is also
-  fakes (``IX_TEST_MODE=fake``) and real Ollama/Surya clients.
+gated on ``spawn_worker`` — the listener is only useful when a worker is
-* ``/healthz`` probes call ``selfcheck()`` on the active clients. In
+draining the queue, so the two share one flag.
  ``fake`` mode they always report ok.
 * The worker's :class:`Pipeline` is built once per spawn with the real
  chain of Steps; each call to the injected ``pipeline_factory`` returns
  a fresh Pipeline so per-request state stays isolated.
 """
 from __future__ import annotations
-import asyncio
+from collections.abc import AsyncIterator
 from collections.abc import AsyncIterator, Callable
 from contextlib import asynccontextmanager, suppress
 from typing import Literal
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
 from ix.adapters.rest.routes import Probes, get_probes
 from ix.adapters.rest.routes import router as rest_router
 from ix.config import AppConfig, get_config
 from ix.genai import make_genai_client
 from ix.genai.client import GenAIClient
 from ix.ocr import make_ocr_client
 from ix.ocr.client import OCRClient
 from ix.pipeline.genai_step import GenAIStep
 from ix.pipeline.ocr_step import OCRStep
 from ix.pipeline.pipeline import Pipeline
 from ix.pipeline.reliability_step import ReliabilityStep
 from ix.pipeline.response_handler_step import ResponseHandlerStep
 from ix.pipeline.setup_step import SetupStep
 from ix.ui import build_router as build_ui_router
 from ix.ui.routes import STATIC_DIR as UI_STATIC_DIR
 def build_pipeline(
    genai: GenAIClient, ocr: OCRClient, cfg: AppConfig
 ) -> Pipeline:
    """Assemble the production :class:`Pipeline` with injected clients.
    Kept as a module-level helper so tests that want to exercise the
    production wiring (without running the worker) can call it directly.
    """
    from pathlib import Path
    from ix.ingestion import FetchConfig
    return Pipeline(
        steps=[
            SetupStep(
                tmp_dir=Path(cfg.tmp_dir),
                fetch_config=FetchConfig(
                    connect_timeout_s=float(cfg.file_connect_timeout_seconds),
                    read_timeout_s=float(cfg.file_read_timeout_seconds),
                    max_bytes=cfg.file_max_bytes,
                ),
            ),
            OCRStep(ocr_client=ocr),
            GenAIStep(genai_client=genai),
            ReliabilityStep(),
            ResponseHandlerStep(),
        ]
    )
 def _make_ollama_probe(
    genai: GenAIClient, cfg: AppConfig
 ) -> Callable[[], Literal["ok", "degraded", "fail"]]:
    """Adapter: async ``selfcheck`` → sync callable the route expects.
    Always drives the coroutine on a throwaway event loop in a separate
    thread. This keeps the behavior identical whether the caller holds an
    event loop (FastAPI request) or doesn't (a CLI tool), and avoids the
    ``asyncio.run`` vs. already-running-loop footgun.
    """
    def probe() -> Literal["ok", "degraded", "fail"]:
        if not hasattr(genai, "selfcheck"):
            return "ok"  # fake client — nothing to probe.
        return _run_async_sync(
            lambda: genai.selfcheck(expected_model=cfg.default_model),  # type: ignore[attr-defined]
            fallback="fail",
        )
    return probe
 def _make_ocr_probe(ocr: OCRClient) -> Callable[[], Literal["ok", "fail"]]:
    def probe() -> Literal["ok", "fail"]:
        if not hasattr(ocr, "selfcheck"):
            return "ok"  # fake — nothing to probe.
        return _run_async_sync(
            lambda: ocr.selfcheck(),  # type: ignore[attr-defined]
            fallback="fail",
        )
    return probe
 def _make_ocr_gpu_probe(ocr: OCRClient) -> Callable[[], bool | None]:
    """Adapter: read the OCR client's recorded ``gpu_available`` attribute.
    The attribute is set by :meth:`SuryaOCRClient.warm_up` on first load.
    Returns ``None`` when the client has no such attribute (e.g. FakeOCRClient
    in test mode) or warm_up hasn't happened yet. Never raises.
    """
    def probe() -> bool | None:
        return getattr(ocr, "gpu_available", None)
    return probe
 def _run_async_sync(make_coro, *, fallback: str) -> str:  # type: ignore[no-untyped-def]
    """Run ``make_coro()`` on a fresh loop in a thread; return its result.
    The thread owns its own event loop so the caller's loop (if any) keeps
    running. Any exception collapses to ``fallback``.
    """
    import threading
    result: dict[str, object] = {}
    def _runner() -> None:
        loop = asyncio.new_event_loop()
        try:
            result["value"] = loop.run_until_complete(make_coro())
        except Exception as exc:  # any error collapses to fallback
            result["error"] = exc
        finally:
            loop.close()
    t = threading.Thread(target=_runner)
    t.start()
    t.join()
    if "error" in result or "value" not in result:
        return fallback
    return str(result["value"])
 def create_app(*, spawn_worker: bool = True) -> FastAPI:
@ -164,36 +36,21 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
    @asynccontextmanager
    async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
        cfg = get_config()
        # Build the clients once per process. The worker's pipeline
        # factory closes over these so every job runs through the same
        # Ollama/Surya instance (Surya's predictors are heavy; re-loading
        # them per job would be catastrophic).
        genai_client = make_genai_client(cfg)
        ocr_client = make_ocr_client(cfg)
        # Override the route-level probe DI so /healthz reflects the
        # actual clients. Tests that want canned probes can still override
        # ``get_probes`` at the TestClient layer.
        _app.dependency_overrides.setdefault(
            get_probes,
            lambda: Probes(
                ollama=_make_ollama_probe(genai_client, cfg),
                ocr=_make_ocr_probe(ocr_client),
                ocr_gpu=_make_ocr_gpu_probe(ocr_client),
            ),
        )
        worker_task = None
        listener = None
        if spawn_worker:
            # Pipeline factory + listener wiring live in Chunk 4's
            # production entrypoint; keeping this path best-effort lets the
            # lifespan still start even on a box where Ollama/Surya aren't
            # available (the listener just gives us a passive 10 s poll).
            try:
                from ix.adapters.pg_queue.listener import (
                    PgQueueListener,
                    asyncpg_dsn_from_sqlalchemy_url,
                )
                from ix.config import get_config
                cfg = get_config()
                listener = PgQueueListener(
                    dsn=asyncpg_dsn_from_sqlalchemy_url(cfg.postgres_url)
                )
@ -202,10 +59,10 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
                listener = None
            try:
-                worker_task = await _spawn_production_worker(
+                from ix.worker.loop import spawn_worker_task
-                    cfg, genai_client, ocr_client, listener
+
-                )
+                worker_task = await spawn_worker_task(_app)
-            except Exception:
+            except ImportError:
                worker_task = None
        try:
            yield
@ -220,41 +77,4 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
    app = FastAPI(lifespan=lifespan, title="infoxtractor", version="0.1.0")
    app.include_router(rest_router)
    # Browser UI — additive, never touches the REST paths above.
    app.include_router(build_ui_router())
    # Static assets for the UI. CDN-only for MVP so the directory is
    # essentially empty, but the mount must exist so relative asset
    # URLs resolve cleanly.
    app.mount(
        "/ui/static",
        StaticFiles(directory=str(UI_STATIC_DIR)),
        name="ui-static",
    )
    return app
 async def _spawn_production_worker(
    cfg: AppConfig,
    genai: GenAIClient,
    ocr: OCRClient,
    listener,  # type: ignore[no-untyped-def]
 ) -> asyncio.Task[None]:
    """Spawn the background worker with a production pipeline factory."""
    from ix.store.engine import get_session_factory
    from ix.worker.loop import Worker
    def pipeline_factory() -> Pipeline:
        return build_pipeline(genai, ocr, cfg)
    worker = Worker(
        session_factory=get_session_factory(),
        pipeline_factory=pipeline_factory,
        poll_interval_seconds=10.0,
        max_running_seconds=2 * cfg.pipeline_request_timeout_seconds,
        callback_timeout_seconds=cfg.callback_timeout_seconds,
        wait_for_work=listener.wait_for_work if listener is not None else None,
    )
    stop = asyncio.Event()
    return asyncio.create_task(worker.run(stop))
--- a/src/ix/config.py
+++ b/src/ix/config.py
@ -12,7 +12,6 @@ re-read after ``monkeypatch.setenv``. Production code never clears the cache.
 from __future__ import annotations
 from functools import lru_cache
 from typing import Literal
 from pydantic_settings import BaseSettings, SettingsConfigDict
@ -33,17 +32,14 @@ class AppConfig(BaseSettings):
    )
    # --- Job store ---
    # Defaults assume the ix container runs with `network_mode: host` and
    # reaches the shared `postgis` and `ollama` containers on loopback;
    # spec §11 / docker-compose.yml ship that configuration.
    postgres_url: str = (
        "postgresql+asyncpg://infoxtractor:<password>"
-        "@127.0.0.1:5431/infoxtractor"
+        "@host.docker.internal:5431/infoxtractor"
    )
    # --- LLM backend ---
-    ollama_url: str = "http://127.0.0.1:11434"
+    ollama_url: str = "http://host.docker.internal:11434"
-    default_model: str = "qwen3:14b"
+    default_model: str = "gpt-oss:20b"
    # --- OCR ---
    ocr_engine: str = "surya"
@ -66,13 +62,6 @@ class AppConfig(BaseSettings):
    # --- Observability ---
    log_level: str = "INFO"
    # --- Test / wiring mode ---
    # ``fake``: factories return FakeGenAIClient / FakeOCRClient and
    # ``/healthz`` probes report ok. CI sets this so the Forgejo runner
    # doesn't need access to Ollama or GPU-backed Surya. ``None`` (default)
    # means production wiring: real OllamaClient + SuryaOCRClient.
    test_mode: Literal["fake"] | None = None
@lru_cache(maxsize=1)
 def get_config() -> AppConfig:
--- a/src/ix/contracts/request.py
+++ b/src/ix/contracts/request.py
@ -22,11 +22,6 @@ class FileRef(BaseModel):
    Used when the file URL needs authentication (e.g. Paperless ``Token``) or a
    tighter size cap than :envvar:`IX_FILE_MAX_BYTES`. Plain URLs that need no
    headers can stay as bare ``str`` values in :attr:`Context.files`.
    ``display_name`` is pure UI metadata — the pipeline never consults it for
    execution. When the UI uploads a PDF under a random ``{uuid}.pdf`` name on
    disk, it stashes the client-provided filename here so the browser can
    surface "your_statement.pdf" instead of "8f3a...pdf" back to the user.
    """
    model_config = ConfigDict(extra="forbid")
@ -34,7 +29,6 @@ class FileRef(BaseModel):
    url: str
    headers: dict[str, str] = Field(default_factory=dict)
    max_bytes: int | None = None
    display_name: str | None = None
 class Context(BaseModel):
@ -89,44 +83,6 @@ class Options(BaseModel):
    provenance: ProvenanceOptions = Field(default_factory=ProvenanceOptions)
 class UseCaseFieldDef(BaseModel):
    """One field in an ad-hoc, caller-defined extraction schema.
    The UI (and any other caller that doesn't want to wait on a backend
    registry entry) ships one of these per desired output field. The pipeline
    builds a fresh Pydantic response class from the list on each request.
    ``choices`` only applies to ``type == "str"`` — it turns the field into a
    ``Literal[*choices]``. For any other type the builder raises
    ``IX_001_001``.
    """
    model_config = ConfigDict(extra="forbid")
    name: str  # must be a valid Python identifier
    type: Literal["str", "int", "float", "decimal", "date", "datetime", "bool"]
    required: bool = False
    description: str | None = None
    choices: list[str] | None = None
 class InlineUseCase(BaseModel):
    """Caller-defined use case bundled into the :class:`RequestIX`.
    When present on a request, the pipeline builds the ``(Request, Response)``
    Pydantic class pair on the fly from :attr:`fields` and skips the
    registered use-case lookup. The registry-based ``use_case`` field is still
    required on the request for metrics/logging but becomes a free-form label.
    """
    model_config = ConfigDict(extra="forbid")
    use_case_name: str
    system_prompt: str
    default_model: str | None = None
    fields: list[UseCaseFieldDef]
 class RequestIX(BaseModel):
    """Top-level job request.
@ -134,12 +90,6 @@ class RequestIX(BaseModel):
    it; the REST adapter / pg-queue adapter populates it on insert. The field
    is kept here so the contract is closed-over-construction round-trips
    (e.g. when the worker re-hydrates a job out of the store).
    When ``use_case_inline`` is present, the pipeline uses it verbatim to
    build an ad-hoc ``(Request, Response)`` class pair and skips the registry
    lookup; ``use_case`` becomes a free-form label (still required for
    metrics/logging). When ``use_case_inline`` is absent, ``use_case`` is
    looked up in :data:`ix.use_cases.REGISTRY` as before.
    """
    model_config = ConfigDict(extra="forbid")
@ -151,4 +101,3 @@ class RequestIX(BaseModel):
    context: Context
    options: Options = Field(default_factory=Options)
    callback_url: str | None = None
    use_case_inline: InlineUseCase | None = None
--- a/src/ix/genai/init.py
+++ b/src/ix/genai/init.py
@ -1,43 +1,18 @@
 """GenAI subsystem: protocol + fake client + invocation-result dataclasses.
-Real backends (Ollama, …) plug in behind :class:`GenAIClient`. The factory
+Real backends (Ollama, etc.) plug in behind :class:`GenAIClient`. The MVP
-:func:`make_genai_client` picks between :class:`FakeGenAIClient` (for CI
+ships only :class:`FakeGenAIClient` from this package; the real Ollama
-/ hermetic tests via ``IX_TEST_MODE=fake``) and :class:`OllamaClient`
+client lands in Chunk 4.
 (production). Tests that want a real Ollama client anyway can call the
 constructor directly.
 """
 from __future__ import annotations
 from ix.config import AppConfig
 from ix.genai.client import GenAIClient, GenAIInvocationResult, GenAIUsage
 from ix.genai.fake import FakeGenAIClient
 from ix.genai.ollama_client import OllamaClient
 def make_genai_client(cfg: AppConfig) -> GenAIClient:
    """Return the :class:`GenAIClient` configured for the current run.
    When ``cfg.test_mode == "fake"`` the fake is returned; the pipeline
    callers are expected to override the injected client via DI if they
    want a non-default canned response. Otherwise a live
    :class:`OllamaClient` bound to ``cfg.ollama_url`` and the per-call
    timeout is returned.
    """
    if cfg.test_mode == "fake":
        return FakeGenAIClient(parsed=None)
    return OllamaClient(
        base_url=cfg.ollama_url,
        per_call_timeout_s=float(cfg.genai_call_timeout_seconds),
    )
 __all__ = [
    "FakeGenAIClient",
    "GenAIClient",
    "GenAIInvocationResult",
    "GenAIUsage",
    "OllamaClient",
    "make_genai_client",
 ]
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@ -36,7 +36,7 @@ class OllamaClient:
    Parameters
    ----------
    base_url:
-        Root URL of the Ollama server (e.g. ``http://127.0.0.1:11434``).
+        Root URL of the Ollama server (e.g. ``http://host.docker.internal:11434``).
        Trailing slashes are stripped.
    per_call_timeout_s:
        Hard per-call timeout for ``/api/chat``. Spec default: 1500 s.
@ -96,9 +96,8 @@ class OllamaClient:
            ) from exc
        content = (payload.get("message") or {}).get("content") or ""
        json_blob = _extract_json_blob(content)
        try:
-            parsed = response_schema.model_validate_json(json_blob)
+            parsed = response_schema.model_validate_json(content)
        except ValidationError as exc:
            raise IXException(
                IXErrorCode.IX_002_001,
@ -160,39 +159,16 @@ class OllamaClient:
        request_kwargs: dict[str, Any],
        response_schema: type[BaseModel],
    ) -> dict[str, Any]:
-        """Map provider-neutral kwargs to Ollama's /api/chat body.
+        """Map provider-neutral kwargs to Ollama's /api/chat body."""
        Schema strategy for Ollama 0.11.8: we pass ``format="json"`` (loose
        JSON mode) and bake the Pydantic schema into a system message
        ahead of the caller's own system prompt. Rationale:
        * The full Pydantic schema as ``format=<schema>`` crashes llama.cpp's
          structured-output implementation (SIGSEGV) on every non-trivial
          shape — ``anyOf`` / ``$ref`` / ``pattern`` all trigger it.
        * ``format="json"`` alone guarantees valid JSON but not the shape;
          models routinely return ``{}`` when not told what fields to emit.
        * Injecting the schema into the prompt is the cheapest way to
          get both: the model sees the expected shape explicitly, Pydantic
          validates the response at parse time (IX_002_001 on mismatch).
        Non-Ollama ``GenAIClient`` impls can ignore this behaviour and use
        native structured-output (``response_format`` on OpenAI, etc.).
        """
        messages = self._translate_messages(
            list(request_kwargs.get("messages") or [])
        )
        messages = _inject_schema_system_message(messages, response_schema)
        body: dict[str, Any] = {
            "model": request_kwargs.get("model"),
            "messages": messages,
            "stream": False,
-            # NOTE: format is deliberately omitted. `format="json"` made
+            "format": response_schema.model_json_schema(),
            # reasoning models (qwen3) abort after emitting `{}` because the
            # constrained sampler terminated before the chain-of-thought
            # finished; `format=<schema>` segfaulted Ollama 0.11.8. Letting
            # the model stream freely and then extracting the trailing JSON
            # blob works for both reasoning and non-reasoning models.
        }
        options: dict[str, Any] = {}
@ -224,117 +200,4 @@ class OllamaClient:
        return out
 def _extract_json_blob(text: str) -> str:
    """Return the outermost balanced JSON object in ``text``.
    Reasoning models (qwen3, deepseek-r1) wrap their real answer in
    ``<think>…</think>`` blocks. Other models sometimes prefix prose or
    fence the JSON in ```json``` code blocks. Finding the last balanced
    ``{…}`` is the cheapest robust parse that works for all three shapes;
    a malformed response yields the full text and Pydantic catches it
    downstream as ``IX_002_001``.
    """
    start = text.find("{")
    if start < 0:
        return text
    depth = 0
    in_string = False
    escaped = False
    for i in range(start, len(text)):
        ch = text[i]
        if in_string:
            if escaped:
                escaped = False
            elif ch == "\\":
                escaped = True
            elif ch == '"':
                in_string = False
            continue
        if ch == '"':
            in_string = True
        elif ch == "{":
            depth += 1
        elif ch == "}":
            depth -= 1
            if depth == 0:
                return text[start : i + 1]
    return text[start:]
 def _inject_schema_system_message(
    messages: list[dict[str, Any]],
    response_schema: type[BaseModel],
 ) -> list[dict[str, Any]]:
    """Prepend a system message that pins the expected JSON shape.
    Ollama's ``format="json"`` mode guarantees valid JSON but not the
    field set or names. We emit the Pydantic schema as JSON and
    instruct the model to match it. If the caller already provides a
    system message, we prepend ours; otherwise ours becomes the first
    system turn.
    """
    import json as _json
    schema_json = _json.dumps(
        _sanitise_schema_for_ollama(response_schema.model_json_schema()),
        indent=2,
    )
    guidance = (
        "Respond ONLY with a single JSON object matching this JSON Schema "
        "exactly. No prose, no code fences, no explanations. All top-level "
        "properties listed in `required` MUST be present. Use null for "
        "fields you cannot confidently extract. The JSON Schema:\n"
        f"{schema_json}"
    )
    return [{"role": "system", "content": guidance}, *messages]
 def _sanitise_schema_for_ollama(schema: Any) -> Any:
    """Strip null branches from ``anyOf`` unions.
    Ollama 0.11.8's llama.cpp structured-output implementation segfaults on
    Pydantic v2's standard Optional pattern::
        {"anyOf": [{"type": "string"}, {"type": "null"}]}
    We collapse any ``anyOf`` that includes a ``{"type": "null"}`` entry to
    its non-null branch — single branch becomes that branch inline; multiple
    branches keep the union without null. This only narrows what the LLM is
    *told* it may emit; Pydantic still validates the real response and can
    accept ``None`` at parse time if the field is ``Optional``.
    Walk is recursive and structure-preserving. Other ``anyOf`` shapes (e.g.
    polymorphic unions without null) are left alone.
    """
    if isinstance(schema, dict):
        cleaned: dict[str, Any] = {}
        for key, value in schema.items():
            if key == "anyOf" and isinstance(value, list):
                non_null = [
                    _sanitise_schema_for_ollama(branch)
                    for branch in value
                    if not (isinstance(branch, dict) and branch.get("type") == "null")
                ]
                if len(non_null) == 1:
                    # Inline the single remaining branch; merge its keys into the
                    # parent so siblings like ``default``/``title`` are preserved.
                    only = non_null[0]
                    if isinstance(only, dict):
                        for ok, ov in only.items():
                            cleaned.setdefault(ok, ov)
                    else:
                        cleaned[key] = non_null
                elif len(non_null) == 0:
                    # Pathological: nothing left. Fall back to a permissive type.
                    cleaned["type"] = "string"
                else:
                    cleaned[key] = non_null
            else:
                cleaned[key] = _sanitise_schema_for_ollama(value)
        return cleaned
    if isinstance(schema, list):
        return [_sanitise_schema_for_ollama(item) for item in schema]
    return schema
 __all__ = ["OllamaClient"]
--- a/src/ix/ocr/init.py
+++ b/src/ix/ocr/init.py
@ -1,34 +1,13 @@
-"""OCR subsystem: protocol + fake + real Surya client + factory.
+"""OCR subsystem: protocol + fake client.
-Real engines (Surya today, Azure DI / AWS Textract … deferred) plug in
+Real engines (Surya, Azure DI, …) plug in behind :class:`OCRClient`. The
-behind :class:`OCRClient`. The factory :func:`make_ocr_client` picks
+MVP ships only :class:`FakeOCRClient` from this package; the real Surya
-between :class:`FakeOCRClient` (when ``IX_TEST_MODE=fake``) and
+client lands in Chunk 4.
 :class:`SuryaOCRClient` (production). Unknown engine names raise so a
 typo'd ``IX_OCR_ENGINE`` surfaces at startup, not later.
 """
 from __future__ import annotations
 from ix.config import AppConfig
 from ix.contracts.response import OCRDetails, OCRResult
 from ix.ocr.client import OCRClient
 from ix.ocr.fake import FakeOCRClient
 from ix.ocr.surya_client import SuryaOCRClient
-
+__all__ = ["FakeOCRClient", "OCRClient"]
 def make_ocr_client(cfg: AppConfig) -> OCRClient:
    """Return the :class:`OCRClient` configured for the current run."""
    if cfg.test_mode == "fake":
        return FakeOCRClient(canned=OCRResult(result=OCRDetails()))
    if cfg.ocr_engine == "surya":
        return SuryaOCRClient()
    raise ValueError(f"Unknown ocr_engine: {cfg.ocr_engine!r}")
 __all__ = [
    "FakeOCRClient",
    "OCRClient",
    "SuryaOCRClient",
    "make_ocr_client",
 ]
--- a/src/ix/ocr/client.py
+++ b/src/ix/ocr/client.py
@ -5,19 +5,11 @@ method satisfies the Protocol. :class:`~ix.pipeline.ocr_step.OCRStep`
 depends on the Protocol, not a concrete class, so swapping engines
 (``FakeOCRClient`` in tests, ``SuryaOCRClient`` in prod) stays a wiring
 change at the app factory.
 Per-page source location (``files`` + ``page_metadata``) flows in as
 optional kwargs: fakes ignore them; the real
 :class:`~ix.ocr.surya_client.SuryaOCRClient` uses them to render each
 page's pixels back from disk. Keeping these optional lets unit tests stay
 pages-only while production wiring (Task 4.3) plumbs through the real
 filesystem handles.
 """
 from __future__ import annotations
-from pathlib import Path
+from typing import Protocol, runtime_checkable
 from typing import Any, Protocol, runtime_checkable
 from ix.contracts import OCRResult, Page
@ -32,18 +24,8 @@ class OCRClient(Protocol):
    per input page (in the same order).
    """
-    async def ocr(
+    async def ocr(self, pages: list[Page]) -> OCRResult:
-        self,
+        """Run OCR over the input pages; return the structured result."""
        pages: list[Page],
        *,
        files: list[tuple[Path, str]] | None = None,
        page_metadata: list[Any] | None = None,
    ) -> OCRResult:
        """Run OCR over the input pages; return the structured result.
        ``files`` and ``page_metadata`` are optional for hermetic tests;
        real engines that need to re-render from disk read them.
        """
        ...
--- a/src/ix/ocr/fake.py
+++ b/src/ix/ocr/fake.py
@ -30,17 +30,8 @@ class FakeOCRClient:
        self._canned = canned
        self._raise_on_call = raise_on_call
-    async def ocr(
+    async def ocr(self, pages: list[Page]) -> OCRResult:
-        self,
+        """Return the canned result or raise the configured error."""
        pages: list[Page],
        **_kwargs: object,
    ) -> OCRResult:
        """Return the canned result or raise the configured error.
        Accepts (and ignores) any keyword args the production Protocol may
        carry — keeps the fake swappable for :class:`SuryaOCRClient` at
        call sites that pass ``files`` / ``page_metadata``.
        """
        if self._raise_on_call is not None:
            raise self._raise_on_call
        return self._canned
--- a/src/ix/ocr/surya_client.py
+++ b/src/ix/ocr/surya_client.py
@ -1,252 +0,0 @@
 """SuryaOCRClient — real :class:`OCRClient` backed by ``surya-ocr``.
 Per spec §6.2: the MVP OCR engine. Runs Surya's detection + recognition
 predictors over per-page PIL images rendered from the downloaded sources
 (PDFs via PyMuPDF, images via Pillow).
 Design choices:
 * **Lazy model loading.** ``__init__`` is cheap; the heavy predictors are
  built on first :meth:`ocr` / :meth:`selfcheck` / explicit :meth:`warm_up`.
  This keeps FastAPI's lifespan predictable — ops can decide whether to
  pay the load cost up front or on first request.
 * **Device is Surya's default.** CUDA on the prod box, MPS on M-series Macs.
  We deliberately don't pin.
 * **No text-token reuse from PyMuPDF.** The cross-check against Paperless'
  Tesseract output (ReliabilityStep's ``text_agreement``) is only meaningful
  with a truly independent OCR pass, so we always render-and-recognize
  even for PDFs that carry embedded text.
 The ``surya-ocr`` package pulls torch + heavy model deps, so it's kept
 behind the ``[ocr]`` extra. All Surya imports are deferred into
 :meth:`warm_up` so running the unit tests (which patch the predictors)
 doesn't require the package to be installed.
 """
 from __future__ import annotations
 import asyncio
 import contextlib
 from pathlib import Path
 from typing import TYPE_CHECKING, Any, Literal
 from ix.contracts import Line, OCRDetails, OCRResult, Page
 from ix.segmentation import PageMetadata
 if TYPE_CHECKING:  # pragma: no cover
    from PIL import Image as PILImage
 class SuryaOCRClient:
    """Surya-backed OCR engine.
    Attributes are created lazily by :meth:`warm_up`. The unit tests inject
    mocks directly onto ``_recognition_predictor`` / ``_detection_predictor``
    to avoid the Surya import chain.
    """
    def __init__(self) -> None:
        self._recognition_predictor: Any = None
        self._detection_predictor: Any = None
        # ``None`` until warm_up() has run at least once. After that it's the
        # observed value of ``torch.cuda.is_available()`` at load time. We
        # cache it on the instance so ``/healthz`` / the UI can surface a
        # CPU-mode warning without re-probing torch each request.
        self.gpu_available: bool | None = None
    def warm_up(self) -> None:
        """Load the detection + recognition predictors. Idempotent.
        Called automatically on the first :meth:`ocr` / :meth:`selfcheck`,
        or explicitly from the app lifespan to front-load the cost.
        """
        if (
            self._recognition_predictor is not None
            and self._detection_predictor is not None
        ):
            return
        # Deferred imports: only reachable when the optional [ocr] extra is
        # installed. Keeping them inside the method so base-install unit
        # tests (which patch the predictors) don't need surya on sys.path.
        from surya.detection import DetectionPredictor  # type: ignore[import-not-found]
        from surya.foundation import FoundationPredictor  # type: ignore[import-not-found]
        from surya.recognition import RecognitionPredictor  # type: ignore[import-not-found]
        foundation = FoundationPredictor()
        self._recognition_predictor = RecognitionPredictor(foundation)
        self._detection_predictor = DetectionPredictor()
        # Best-effort CUDA probe — only after predictors loaded cleanly so we
        # know torch is fully importable. ``torch`` is a Surya transitive
        # dependency so if we got this far it's on sys.path. We swallow any
        # exception to keep warm_up() sturdy: the attribute stays None and the
        # UI falls back to "unknown" gracefully.
        try:
            import torch  # type: ignore[import-not-found]
            self.gpu_available = bool(torch.cuda.is_available())
        except Exception:
            self.gpu_available = None
    async def ocr(
        self,
        pages: list[Page],
        *,
        files: list[tuple[Path, str]] | None = None,
        page_metadata: list[Any] | None = None,
    ) -> OCRResult:
        """Render each input page, run Surya, translate back to contracts."""
        self.warm_up()
        images = self._render_pages(pages, files, page_metadata)
        # Surya is blocking — run it off the event loop.
        loop = asyncio.get_running_loop()
        surya_results = await loop.run_in_executor(
            None, self._run_recognition, images
        )
        out_pages: list[Page] = []
        all_text_fragments: list[str] = []
        for input_page, surya_result in zip(pages, surya_results, strict=True):
            lines: list[Line] = []
            for tl in getattr(surya_result, "text_lines", []) or []:
                flat = self._flatten_polygon(getattr(tl, "polygon", None))
                text = getattr(tl, "text", None)
                lines.append(Line(text=text, bounding_box=flat))
                if text:
                    all_text_fragments.append(text)
            out_pages.append(
                Page(
                    page_no=input_page.page_no,
                    width=input_page.width,
                    height=input_page.height,
                    angle=input_page.angle,
                    unit=input_page.unit,
                    lines=lines,
                )
            )
        details = OCRDetails(
            text="\n".join(all_text_fragments) if all_text_fragments else None,
            pages=out_pages,
        )
        return OCRResult(result=details, meta_data={"engine": "surya"})
    async def selfcheck(self) -> Literal["ok", "fail"]:
        """Run the predictors on a 1x1 image to confirm the stack works."""
        try:
            self.warm_up()
        except Exception:
            return "fail"
        try:
            from PIL import Image as PILImageRuntime
            img = PILImageRuntime.new("RGB", (1, 1), color="white")
            loop = asyncio.get_running_loop()
            await loop.run_in_executor(None, self._run_recognition, [img])
        except Exception:
            return "fail"
        return "ok"
    def _run_recognition(self, images: list[PILImage.Image]) -> list[Any]:
        """Invoke the recognition predictor. Kept tiny for threadpool offload."""
        return list(
            self._recognition_predictor(
                images, det_predictor=self._detection_predictor
            )
        )
    def _render_pages(
        self,
        pages: list[Page],
        files: list[tuple[Path, str]] | None,
        page_metadata: list[Any] | None,
    ) -> list[PILImage.Image]:
        """Render each input :class:`Page` to a PIL image.
        We walk pages + page_metadata in lockstep so we know which source
        file each page came from and (for PDFs) what page-index to render.
        Text-only pages (``file_index is None``) get a blank 1x1 placeholder
        so Surya returns an empty result and downstream code still gets one
        entry per input page.
        """
        from PIL import Image as PILImageRuntime
        metas: list[PageMetadata] = list(page_metadata or [])
        file_records: list[tuple[Path, str]] = list(files or [])
        # Per-file lazy PDF openers so we don't re-open across pages.
        pdf_docs: dict[int, Any] = {}
        # Per-file running page-within-file counter. For PDFs we emit one
        # entry per PDF page in order; ``pages`` was built the same way by
        # DocumentIngestor, so a parallel counter reconstructs the mapping.
        per_file_cursor: dict[int, int] = {}
        rendered: list[PILImage.Image] = []
        try:
            for idx, _page in enumerate(pages):
                meta = metas[idx] if idx < len(metas) else PageMetadata()
                file_index = meta.file_index
                if file_index is None or file_index >= len(file_records):
                    # Text-only page — placeholder image; Surya returns empty.
                    rendered.append(
                        PILImageRuntime.new("RGB", (1, 1), color="white")
                    )
                    continue
                local_path, mime = file_records[file_index]
                if mime == "application/pdf":
                    doc = pdf_docs.get(file_index)
                    if doc is None:
                        import fitz  # PyMuPDF
                        doc = fitz.open(str(local_path))
                        pdf_docs[file_index] = doc
                    pdf_page_no = per_file_cursor.get(file_index, 0)
                    per_file_cursor[file_index] = pdf_page_no + 1
                    pdf_page = doc.load_page(pdf_page_no)
                    pix = pdf_page.get_pixmap(dpi=200)
                    img = PILImageRuntime.frombytes(
                        "RGB", (pix.width, pix.height), pix.samples
                    )
                    rendered.append(img)
                elif mime in ("image/png", "image/jpeg", "image/tiff"):
                    frame_no = per_file_cursor.get(file_index, 0)
                    per_file_cursor[file_index] = frame_no + 1
                    img = PILImageRuntime.open(local_path)
                    # Handle multi-frame (TIFF) — seek to the right frame.
                    with contextlib.suppress(EOFError):
                        img.seek(frame_no)
                    rendered.append(img.convert("RGB"))
                else:  # pragma: no cover - ingestor already rejected
                    rendered.append(
                        PILImageRuntime.new("RGB", (1, 1), color="white")
                    )
        finally:
            for doc in pdf_docs.values():
                with contextlib.suppress(Exception):
                    doc.close()
        return rendered
    @staticmethod
    def _flatten_polygon(polygon: Any) -> list[float]:
        """Flatten ``[[x1,y1],[x2,y2],[x3,y3],[x4,y4]]`` → 8-float list.
        Surya emits 4 quad corners. The spec wants 8 raw-pixel coords so
        downstream provenance normalisation can consume them directly.
        """
        if not polygon:
            return []
        flat: list[float] = []
        for point in polygon:
            if isinstance(point, (list, tuple)) and len(point) >= 2:
                flat.append(float(point[0]))
                flat.append(float(point[1]))
        return flat
 __all__ = ["SuryaOCRClient"]
--- a/src/ix/pipeline/ocr_step.py
+++ b/src/ix/pipeline/ocr_step.py
@ -56,11 +56,7 @@ class OCRStep(Step):
        assert ctx is not None, "SetupStep must populate response_ix.context"
        pages = list(getattr(ctx, "pages", []))
-        files = list(getattr(ctx, "files", []) or [])
+        ocr_result = await self._client.ocr(pages)
        page_metadata = list(getattr(ctx, "page_metadata", []) or [])
        ocr_result = await self._client.ocr(
            pages, files=files, page_metadata=page_metadata
        )
        # Inject page tags around each OCR page's content so the LLM can
        # cross-reference the visual anchor without a separate prompt hack.
--- a/src/ix/pipeline/setup_step.py
+++ b/src/ix/pipeline/setup_step.py
@ -34,7 +34,6 @@ from ix.ingestion import (
 )
 from ix.pipeline.step import Step
 from ix.use_cases import get_use_case
 from ix.use_cases.inline import build_use_case_classes
 class _Fetcher(Protocol):
@ -89,18 +88,9 @@ class SetupStep(Step):
    async def process(
        self, request_ix: RequestIX, response_ix: ResponseIX
    ) -> ResponseIX:
-        # 1. Load the use-case pair — either from the caller's inline
+        # 1. Load the use-case pair — early so an unknown name fails before
-        # definition (wins over registry) or from the registry by name.
+        # we waste time downloading files.
-        # Done early so an unknown name / bad inline definition fails
+        use_case_request_cls, use_case_response_cls = get_use_case(request_ix.use_case)
        # before we waste time downloading files.
        if request_ix.use_case_inline is not None:
            use_case_request_cls, use_case_response_cls = build_use_case_classes(
                request_ix.use_case_inline
            )
        else:
            use_case_request_cls, use_case_response_cls = get_use_case(
                request_ix.use_case
            )
        use_case_request = use_case_request_cls()
        # 2. Resolve the per-request scratch directory. ix_id is assigned
--- a/src/ix/store/jobs_repo.py
+++ b/src/ix/store/jobs_repo.py
@ -157,76 +157,6 @@ async def get(session: AsyncSession, job_id: UUID) -> Job | None:
    return _orm_to_job(row) if row is not None else None
 async def queue_position(
    session: AsyncSession, job_id: UUID
 ) -> tuple[int, int]:
    """Return ``(ahead, total_active)`` for a pending/running job.
    ``ahead`` counts active jobs (``pending`` or ``running``) that would be
    claimed by the worker before this one:
    * any ``running`` job is always ahead — it has the worker already.
    * other ``pending`` jobs with a strictly older ``created_at`` are ahead
      (the worker picks pending rows in ``ORDER BY created_at`` per
      :func:`claim_next_pending`).
    ``total_active`` is the total count of ``pending`` + ``running`` rows.
    Terminal jobs (``done`` / ``error``) always return ``(0, 0)`` — there is
    no meaningful "position" for a finished job.
    """
    row = await session.scalar(select(IxJob).where(IxJob.job_id == job_id))
    if row is None:
        return (0, 0)
    if row.status not in ("pending", "running"):
        return (0, 0)
    total_active = int(
        await session.scalar(
            select(func.count())
            .select_from(IxJob)
            .where(IxJob.status.in_(("pending", "running")))
        )
        or 0
    )
    if row.status == "running":
        # A running row is at the head of the queue for our purposes.
        return (0, total_active)
    # Pending: count running rows (always ahead) + older pending rows.
    # We tiebreak on ``job_id`` for deterministic ordering when multiple
    # rows share a ``created_at`` (e.g. the same transaction inserts two
    # jobs, which Postgres stamps with identical ``now()`` values).
    running_ahead = int(
        await session.scalar(
            select(func.count())
            .select_from(IxJob)
            .where(IxJob.status == "running")
        )
        or 0
    )
    pending_ahead = int(
        await session.scalar(
            select(func.count())
            .select_from(IxJob)
            .where(
                IxJob.status == "pending",
                (
                    (IxJob.created_at < row.created_at)
                    | (
                        (IxJob.created_at == row.created_at)
                        & (IxJob.job_id < row.job_id)
                    )
                ),
            )
        )
        or 0
    )
    return (running_ahead + pending_ahead, total_active)
 async def get_by_correlation(
    session: AsyncSession, client_id: str, request_id: str
 ) -> Job | None:
--- a/src/ix/ui/init.py
+++ b/src/ix/ui/init.py
@ -1,13 +0,0 @@
 """Minimal browser UI served alongside the REST API at ``/ui``.
 The module is intentionally thin: templates + HTMX + Pico CSS (all from
 CDNs, no build step). Uploads land in ``{cfg.tmp_dir}/ui/<uuid>.pdf`` and
 are submitted through the same :func:`ix.store.jobs_repo.insert_pending`
 entry point the REST adapter uses — the UI does not duplicate that logic.
 """
 from __future__ import annotations
 from ix.ui.routes import build_router
 __all__ = ["build_router"]
--- a/src/ix/ui/routes.py
+++ b/src/ix/ui/routes.py
@ -1,428 +0,0 @@
 """``/ui`` router — thin HTML wrapper over the existing jobs pipeline.
 Design notes:
 * Uploads stream to ``{cfg.tmp_dir}/ui/{uuid4()}.pdf`` via aiofiles; the
  file persists for the lifetime of the ``ix_id`` (no cleanup cron — spec
  deferred).
 * The submission handler builds a :class:`RequestIX` (inline use case
  supported) and inserts it via the same
  :func:`ix.store.jobs_repo.insert_pending` the REST adapter uses.
 * Responses are HTML. For HTMX-triggered submissions the handler returns
  ``HX-Redirect`` so the whole page swaps; for plain form posts it returns
  a 303 redirect.
 * The fragment endpoint powers the polling loop: while the job is
  pending/running, the fragment auto-refreshes every 2s via
  ``hx-trigger="every 2s"``; when terminal, the trigger is dropped and the
  pretty-printed response is rendered with highlight.js.
 * A process-wide 60-second cache of the OCR GPU flag (read from the
  injected :class:`Probes`) gates a "Surya is running on CPU" notice on
  the fragment. The fragment is polled every 2 s; re-probing the OCR
  client on every poll is waste — one probe per minute is plenty.
 """
 from __future__ import annotations
 import json
 import time
 import uuid
 from pathlib import Path
 from typing import Annotated
 from urllib.parse import unquote, urlsplit
 from uuid import UUID
 import aiofiles
 from fastapi import (
    APIRouter,
    Depends,
    File,
    Form,
    HTTPException,
    Request,
    UploadFile,
 )
 from fastapi.responses import HTMLResponse, RedirectResponse, Response
 from fastapi.templating import Jinja2Templates
 from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
 from ix.adapters.rest.routes import Probes, get_probes, get_session_factory_dep
 from ix.config import AppConfig, get_config
 from ix.contracts.request import (
    Context,
    FileRef,
    GenAIOptions,
    InlineUseCase,
    OCROptions,
    Options,
    ProvenanceOptions,
    RequestIX,
    UseCaseFieldDef,
 )
 from ix.store import jobs_repo
 from ix.use_cases import REGISTRY
 TEMPLATES_DIR = Path(__file__).parent / "templates"
 STATIC_DIR = Path(__file__).parent / "static"
 # Module-level cache for the OCR GPU flag. The tuple is ``(value, expires_at)``
 # where ``expires_at`` is a monotonic-clock deadline. A per-request call to
 # :func:`_cached_ocr_gpu` re-probes only once the deadline has passed.
 _OCR_GPU_CACHE: tuple[bool | None, float] = (None, 0.0)
 _OCR_GPU_TTL_SECONDS = 60.0
 def _templates() -> Jinja2Templates:
    """One Jinja env per process; cheap enough to build per DI call."""
    return Jinja2Templates(directory=str(TEMPLATES_DIR))
 def _ui_tmp_dir(cfg: AppConfig) -> Path:
    """Where uploads land. Created on first use; never cleaned up."""
    d = Path(cfg.tmp_dir) / "ui"
    d.mkdir(parents=True, exist_ok=True)
    return d
 def _cached_ocr_gpu(probes: Probes) -> bool | None:
    """Read the cached OCR GPU flag, re-probing if the TTL has elapsed.
    Used by the index + fragment routes so the HTMX poll loop doesn't hit
    the OCR client's torch-probe every 2 seconds. Falls back to ``None``
    (unknown) on any probe error.
    """
    global _OCR_GPU_CACHE
    value, expires_at = _OCR_GPU_CACHE
    now = time.monotonic()
    if now >= expires_at:
        try:
            value = probes.ocr_gpu()
        except Exception:
            value = None
        _OCR_GPU_CACHE = (value, now + _OCR_GPU_TTL_SECONDS)
    return value
 def _file_display_entries(
    request: RequestIX | None,
 ) -> list[str]:
    """Human-readable filename(s) for a request's context.files.
    Prefers :attr:`FileRef.display_name`. Falls back to the URL's basename
    (``unquote``ed so ``%20`` → space). Plain string entries use the same
    basename rule. Empty list for a request with no files.
    """
    if request is None:
        return []
    out: list[str] = []
    for entry in request.context.files:
        if isinstance(entry, FileRef):
            if entry.display_name:
                out.append(entry.display_name)
                continue
            url = entry.url
        else:
            url = entry
        basename = unquote(urlsplit(url).path.rsplit("/", 1)[-1]) or url
        out.append(basename)
    return out
 def build_router() -> APIRouter:
    """Return a fresh router. Kept as a factory so :mod:`ix.app` can wire DI."""
    router = APIRouter(prefix="/ui", tags=["ui"])
    @router.get("", response_class=HTMLResponse)
    @router.get("/", response_class=HTMLResponse)
    async def index(
        request: Request,
        probes: Annotated[Probes, Depends(get_probes)],
    ) -> Response:
        tpl = _templates()
        return tpl.TemplateResponse(
            request,
            "index.html",
            {
                "registered_use_cases": sorted(REGISTRY.keys()),
                "job": None,
                "form_error": None,
                "form_values": {},
                "file_names": [],
                "cpu_mode": _cached_ocr_gpu(probes) is False,
            },
        )
    @router.get("/jobs/{job_id}", response_class=HTMLResponse)
    async def job_page(
        request: Request,
        job_id: UUID,
        session_factory: Annotated[
            async_sessionmaker[AsyncSession], Depends(get_session_factory_dep)
        ],
        probes: Annotated[Probes, Depends(get_probes)],
    ) -> Response:
        async with session_factory() as session:
            job = await jobs_repo.get(session, job_id)
        if job is None:
            raise HTTPException(status_code=404, detail="job not found")
        tpl = _templates()
        return tpl.TemplateResponse(
            request,
            "index.html",
            {
                "registered_use_cases": sorted(REGISTRY.keys()),
                "job": job,
                "form_error": None,
                "form_values": {},
                "file_names": _file_display_entries(job.request),
                "cpu_mode": _cached_ocr_gpu(probes) is False,
            },
        )
    @router.get("/jobs/{job_id}/fragment", response_class=HTMLResponse)
    async def job_fragment(
        request: Request,
        job_id: UUID,
        session_factory: Annotated[
            async_sessionmaker[AsyncSession], Depends(get_session_factory_dep)
        ],
        probes: Annotated[Probes, Depends(get_probes)],
    ) -> Response:
        async with session_factory() as session:
            job = await jobs_repo.get(session, job_id)
            if job is None:
                raise HTTPException(status_code=404, detail="job not found")
            ahead, total_active = await jobs_repo.queue_position(
                session, job_id
            )
        response_json: str | None = None
        if job.response is not None:
            response_json = json.dumps(
                job.response.model_dump(mode="json"),
                indent=2,
                sort_keys=True,
                default=str,
            )
        elapsed_text = _format_elapsed(job)
        file_names = _file_display_entries(job.request)
        tpl = _templates()
        return tpl.TemplateResponse(
            request,
            "job_fragment.html",
            {
                "job": job,
                "response_json": response_json,
                "ahead": ahead,
                "total_active": total_active,
                "elapsed_text": elapsed_text,
                "file_names": file_names,
                "cpu_mode": _cached_ocr_gpu(probes) is False,
            },
        )
    @router.post("/jobs")
    async def submit_job(
        request: Request,
        session_factory: Annotated[
            async_sessionmaker[AsyncSession], Depends(get_session_factory_dep)
        ],
        pdf: Annotated[UploadFile, File()],
        use_case_name: Annotated[str, Form()],
        use_case_mode: Annotated[str, Form()] = "registered",
        texts: Annotated[str, Form()] = "",
        ix_client_id: Annotated[str, Form()] = "ui",
        request_id: Annotated[str, Form()] = "",
        system_prompt: Annotated[str, Form()] = "",
        default_model: Annotated[str, Form()] = "",
        fields_json: Annotated[str, Form()] = "",
        use_ocr: Annotated[str, Form()] = "",
        ocr_only: Annotated[str, Form()] = "",
        include_ocr_text: Annotated[str, Form()] = "",
        include_geometries: Annotated[str, Form()] = "",
        gen_ai_model_name: Annotated[str, Form()] = "",
        include_provenance: Annotated[str, Form()] = "",
        max_sources_per_field: Annotated[str, Form()] = "10",
    ) -> Response:
        cfg = get_config()
        form_values = {
            "use_case_mode": use_case_mode,
            "use_case_name": use_case_name,
            "ix_client_id": ix_client_id,
            "request_id": request_id,
            "texts": texts,
            "system_prompt": system_prompt,
            "default_model": default_model,
            "fields_json": fields_json,
            "use_ocr": use_ocr,
            "ocr_only": ocr_only,
            "include_ocr_text": include_ocr_text,
            "include_geometries": include_geometries,
            "gen_ai_model_name": gen_ai_model_name,
            "include_provenance": include_provenance,
            "max_sources_per_field": max_sources_per_field,
        }
        def _rerender(error: str, status: int = 200) -> Response:
            tpl = _templates()
            return tpl.TemplateResponse(
                request,
                "index.html",
                {
                    "registered_use_cases": sorted(REGISTRY.keys()),
                    "job": None,
                    "form_error": error,
                    "form_values": form_values,
                },
                status_code=status,
            )
        # --- Inline use case (optional) ---
        inline: InlineUseCase | None = None
        if use_case_mode == "custom":
            try:
                raw_fields = json.loads(fields_json)
            except json.JSONDecodeError as exc:
                return _rerender(f"Invalid fields JSON: {exc}", status=422)
            if not isinstance(raw_fields, list):
                return _rerender(
                    "Invalid fields JSON: must be a list of field objects",
                    status=422,
                )
            try:
                parsed = [UseCaseFieldDef.model_validate(f) for f in raw_fields]
                inline = InlineUseCase(
                    use_case_name=use_case_name,
                    system_prompt=system_prompt,
                    default_model=default_model or None,
                    fields=parsed,
                )
            except Exception as exc:  # pydantic ValidationError or similar
                return _rerender(
                    f"Invalid inline use-case definition: {exc}",
                    status=422,
                )
        # --- PDF upload ---
        upload_dir = _ui_tmp_dir(cfg)
        target = upload_dir / f"{uuid.uuid4().hex}.pdf"
        # Stream copy with a size cap matching IX_FILE_MAX_BYTES.
        total = 0
        limit = cfg.file_max_bytes
        async with aiofiles.open(target, "wb") as out:
            while True:
                chunk = await pdf.read(64 * 1024)
                if not chunk:
                    break
                total += len(chunk)
                if total > limit:
                    # Drop the partial file; no stored state.
                    from contextlib import suppress
                    with suppress(FileNotFoundError):
                        target.unlink()
                    return _rerender(
                        f"PDF exceeds IX_FILE_MAX_BYTES ({limit} bytes)",
                        status=413,
                    )
                await out.write(chunk)
        # --- Build RequestIX ---
        ctx_texts: list[str] = []
        if texts.strip():
            ctx_texts = [texts.strip()]
        req_id = request_id.strip() or uuid.uuid4().hex
        # Preserve the client-provided filename so the UI can surface the
        # original name to the user (the on-disk name is a UUID). Strip any
        # path prefix a browser included.
        original_name = (pdf.filename or "").rsplit("/", 1)[-1].rsplit(
            "\\", 1
        )[-1] or None
        try:
            request_ix = RequestIX(
                use_case=use_case_name or "adhoc",
                use_case_inline=inline,
                ix_client_id=(ix_client_id.strip() or "ui"),
                request_id=req_id,
                context=Context(
                    files=[
                        FileRef(
                            url=f"file://{target.resolve()}",
                            display_name=original_name,
                        )
                    ],
                    texts=ctx_texts,
                ),
                options=Options(
                    ocr=OCROptions(
                        use_ocr=_flag(use_ocr, default=True),
                        ocr_only=_flag(ocr_only, default=False),
                        include_ocr_text=_flag(include_ocr_text, default=False),
                        include_geometries=_flag(include_geometries, default=False),
                    ),
                    gen_ai=GenAIOptions(
                        gen_ai_model_name=(gen_ai_model_name.strip() or None),
                    ),
                    provenance=ProvenanceOptions(
                        include_provenance=_flag(include_provenance, default=True),
                        max_sources_per_field=int(max_sources_per_field or 10),
                    ),
                ),
            )
        except Exception as exc:
            return _rerender(f"Invalid request: {exc}", status=422)
        async with session_factory() as session:
            job = await jobs_repo.insert_pending(
                session, request_ix, callback_url=None
            )
            await session.commit()
        redirect_to = f"/ui/jobs/{job.job_id}"
        if request.headers.get("HX-Request", "").lower() == "true":
            return Response(status_code=200, headers={"HX-Redirect": redirect_to})
        return RedirectResponse(url=redirect_to, status_code=303)
    return router
 def _flag(value: str, *, default: bool) -> bool:
    """HTML forms omit unchecked checkboxes. Treat absence as ``default``."""
    if value == "":
        return default
    return value.lower() in ("on", "true", "1", "yes")
 def _format_elapsed(job) -> str | None:  # type: ignore[no-untyped-def]
    """Render a ``MM:SS`` elapsed string for the fragment template.
    * running → time since ``started_at``
    * done/error → ``finished_at - created_at`` (total wall-clock including
      queue time)
    * pending / missing timestamps → ``None`` (template omits the line)
    """
    from datetime import UTC, datetime
    def _fmt(seconds: float) -> str:
        s = max(0, int(seconds))
        return f"{s // 60:02d}:{s % 60:02d}"
    if job.status == "running" and job.started_at is not None:
        now = datetime.now(UTC)
        return _fmt((now - job.started_at).total_seconds())
    if (
        job.status in ("done", "error")
        and job.finished_at is not None
        and job.created_at is not None
    ):
        return _fmt((job.finished_at - job.created_at).total_seconds())
    return None
--- a/src/ix/ui/static/.gitkeep
+++ b/src/ix/ui/static/.gitkeep
--- a/src/ix/ui/templates/index.html
+++ b/src/ix/ui/templates/index.html
@ -1,241 +0,0 @@
 <!doctype html>
 <html lang="en" data-theme="light">
 <head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <title>
    InfoXtractor{% if job %} &mdash; job {{ job.job_id }}{% endif %}
  </title>
  <link
    rel="stylesheet"
    href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css"
  />
  <link
    rel="stylesheet"
    href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/styles/atom-one-light.min.css"
  />
  <script src="https://unpkg.com/htmx.org@1.9.12"></script>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/highlight.min.js"></script>
  <style>
    main { padding-top: 1.5rem; padding-bottom: 4rem; }
    pre code.hljs { padding: 1rem; border-radius: 0.4rem; }
    .form-error { color: var(--pico-del-color, #c44); font-weight: 600; }
    details[open] > summary { margin-bottom: 0.5rem; }
    .field-hint { font-size: 0.85rem; color: var(--pico-muted-color); }
    nav.ix-header {
      display: flex; gap: 1rem; align-items: baseline;
      padding: 0.6rem 0; border-bottom: 1px solid var(--pico-muted-border-color, #ddd);
      margin-bottom: 1rem; flex-wrap: wrap;
    }
    nav.ix-header .brand { font-weight: 700; margin-right: auto; }
    nav.ix-header code { font-size: 0.9em; }
    .status-panel, .result-panel { margin-top: 0.75rem; }
    .status-panel header, .result-panel header { font-size: 0.95rem; }
    .job-files code { font-size: 0.9em; }
    .cpu-notice { margin-top: 0.6rem; font-size: 0.9rem; color: var(--pico-muted-color); }
    .live-dot {
      display: inline-block; margin-left: 0.3rem;
      animation: ix-blink 1.2s ease-in-out infinite;
      color: var(--pico-primary, #4f8cc9);
    }
    @keyframes ix-blink {
      0%, 100% { opacity: 0.2; }
      50% { opacity: 1; }
    }
    .copy-btn {
      margin-left: 0.3rem; padding: 0.1rem 0.5rem;
      font-size: 0.8rem; line-height: 1.2;
    }
  </style>
 </head>
 <body>
  <main class="container">
    <nav class="ix-header" aria-label="InfoXtractor navigation">
      <span class="brand">InfoXtractor</span>
      <a href="/ui">Upload a new extraction</a>
      {% if job %}
      <span>
        Job:
        <code id="current-job-id">{{ job.job_id }}</code>
        <button
          type="button"
          class="secondary outline copy-btn"
          onclick="navigator.clipboard && navigator.clipboard.writeText('{{ job.job_id }}')"
          aria-label="Copy job id to clipboard"
        >Copy</button>
      </span>
      {% endif %}
    </nav>
    <hgroup>
      <h1>infoxtractor</h1>
      <p>Drop a PDF, pick or define a use case, run the pipeline.</p>
    </hgroup>
    {% if form_error %}
      <article class="form-error">
        <p><strong>Form error:</strong> {{ form_error }}</p>
      </article>
    {% endif %}
    {% if not job %}
    <article>
      <form
        action="/ui/jobs"
        method="post"
        enctype="multipart/form-data"
        hx-post="/ui/jobs"
        hx-encoding="multipart/form-data"
      >
        <label>
          PDF file
          <input type="file" name="pdf" accept="application/pdf" required />
        </label>
        <label>
          Extra texts (optional, e.g. Paperless OCR output)
          <textarea
            name="texts"
            rows="3"
            placeholder="Plain text passed as context.texts[0]"
          >{{ form_values.get("texts", "") }}</textarea>
          <small class="field-hint">Whatever you type is submitted as a single entry in <code>context.texts</code>.</small>
        </label>
        <fieldset>
          <legend>Use case</legend>
          <label>
            <input
              type="radio"
              name="use_case_mode"
              value="registered"
              {% if form_values.get("use_case_mode", "registered") == "registered" %}checked{% endif %}
              onchange="document.getElementById('custom-fields').hidden = true"
            />
            Registered
          </label>
          <label>
            <input
              type="radio"
              name="use_case_mode"
              value="custom"
              {% if form_values.get("use_case_mode") == "custom" %}checked{% endif %}
              onchange="document.getElementById('custom-fields').hidden = false"
            />
            Custom (inline)
          </label>
          <label>
            Use case name
            <input
              type="text"
              name="use_case_name"
              list="registered-use-cases"
              value="{{ form_values.get('use_case_name', 'bank_statement_header') }}"
              required
            />
            <datalist id="registered-use-cases">
              {% for name in registered_use_cases %}
              <option value="{{ name }}"></option>
              {% endfor %}
            </datalist>
          </label>
          <div id="custom-fields" {% if form_values.get("use_case_mode") != "custom" %}hidden{% endif %}>
            <label>
              System prompt
              <textarea name="system_prompt" rows="3">{{ form_values.get("system_prompt", "") }}</textarea>
            </label>
            <label>
              Default model (optional)
              <input
                type="text"
                name="default_model"
                value="{{ form_values.get('default_model', '') }}"
                placeholder="qwen3:14b"
              />
            </label>
            <label>
              Fields (JSON list of {name, type, required?, choices?, description?})
              <textarea name="fields_json" rows="6" placeholder='[{"name": "vendor", "type": "str", "required": true}]'>{{ form_values.get("fields_json", "") }}</textarea>
              <small class="field-hint">Types: str, int, float, decimal, date, datetime, bool. <code>choices</code> works on <code>str</code> only.</small>
            </label>
          </div>
        </fieldset>
        <details>
          <summary>Advanced options</summary>
          <label>
            Client id
            <input type="text" name="ix_client_id" value="{{ form_values.get('ix_client_id', 'ui') }}" />
          </label>
          <label>
            Request id (blank → random)
            <input type="text" name="request_id" value="{{ form_values.get('request_id', '') }}" />
          </label>
          <fieldset>
            <legend>OCR</legend>
            <label><input type="checkbox" name="use_ocr" {% if form_values.get("use_ocr", "on") %}checked{% endif %} /> use_ocr</label>
            <label><input type="checkbox" name="ocr_only" {% if form_values.get("ocr_only") %}checked{% endif %} /> ocr_only</label>
            <label><input type="checkbox" name="include_ocr_text" {% if form_values.get("include_ocr_text") %}checked{% endif %} /> include_ocr_text</label>
            <label><input type="checkbox" name="include_geometries" {% if form_values.get("include_geometries") %}checked{% endif %} /> include_geometries</label>
          </fieldset>
          <label>
            GenAI model override (optional)
            <input type="text" name="gen_ai_model_name" value="{{ form_values.get('gen_ai_model_name', '') }}" />
          </label>
          <fieldset>
            <legend>Provenance</legend>
            <label><input type="checkbox" name="include_provenance" {% if form_values.get("include_provenance", "on") %}checked{% endif %} /> include_provenance</label>
            <label>
              max_sources_per_field
              <input type="number" name="max_sources_per_field" min="1" max="100" value="{{ form_values.get('max_sources_per_field', '10') }}" />
            </label>
          </fieldset>
        </details>
        <button type="submit">Submit</button>
      </form>
    </article>
    {% endif %}
    {% if job %}
    <article id="job-panel">
      <header>
        <strong>Job</strong> <code>{{ job.job_id }}</code>
        <br /><small>ix_id: <code>{{ job.ix_id }}</code></small>
        {% if file_names %}
        <br /><small>
          File{% if file_names|length > 1 %}s{% endif %}:
          {% for name in file_names %}
          <code>{{ name }}</code>{% if not loop.last %}, {% endif %}
          {% endfor %}
        </small>
        {% endif %}
      </header>
      <div
        id="job-status"
        hx-get="/ui/jobs/{{ job.job_id }}/fragment"
        hx-trigger="load"
        hx-swap="innerHTML"
      >
        Loading&hellip;
      </div>
    </article>
    {% endif %}
  </main>
  <script>
    document.body.addEventListener("htmx:afterSettle", () => {
      if (window.hljs) {
        document.querySelectorAll("pre code").forEach((el) => {
          try { hljs.highlightElement(el); } catch (_) { /* noop */ }
        });
      }
    });
  </script>
 </body>
 </html>
--- a/src/ix/ui/templates/job_fragment.html
+++ b/src/ix/ui/templates/job_fragment.html
@ -1,77 +0,0 @@
 {#- HTMX fragment rendered into #job-status on the results panel.
    Pending/running → keep polling every 2s; terminal → render JSON.   -#}
 {% set terminal = job.status in ("done", "error") %}
 <div
  id="job-fragment"
  {% if not terminal %}
  hx-get="/ui/jobs/{{ job.job_id }}/fragment"
  hx-trigger="every 2s"
  hx-swap="outerHTML"
  {% endif %}
 >
  <article class="status-panel">
    <header>
      <strong>Job status</strong>
    </header>
    <p>
      Status:
      <strong>{{ job.status }}</strong>
      {% if not terminal %}
      <span class="live-dot" aria-hidden="true">&#9679;</span>
      {% endif %}
    </p>
    {% if file_names %}
    <p class="job-files">
      File{% if file_names|length > 1 %}s{% endif %}:
      {% for name in file_names %}
      <code>{{ name }}</code>{% if not loop.last %}, {% endif %}
      {% endfor %}
    </p>
    {% endif %}
    {% if job.status == "pending" %}
    <p>
      {% if ahead == 0 %}
      About to start &mdash; the worker just freed up.
      {% else %}
      Queue position: {{ ahead }} ahead &mdash; {{ total_active }} job{% if total_active != 1 %}s{% endif %} total in flight (single worker).
      {% endif %}
    </p>
    <progress></progress>
    {% elif job.status == "running" %}
    {% if elapsed_text %}
    <p>Running for {{ elapsed_text }}.</p>
    {% endif %}
    <progress></progress>
    {% elif terminal %}
    {% if elapsed_text %}
    <p>Finished in {{ elapsed_text }}.</p>
    {% endif %}
    {% endif %}
    {% if cpu_mode and not terminal %}
    <details class="cpu-notice">
      <summary>Surya is running on CPU (~1&ndash;2 min/page)</summary>
      <p>
        A host NVIDIA driver upgrade would unlock GPU extraction; tracked in
        <code>docs/deployment.md</code>.
      </p>
    </details>
    {% endif %}
  </article>
  <article class="result-panel">
    <header>
      <strong>Result</strong>
    </header>
    {% if terminal and response_json %}
    <pre><code class="language-json">{{ response_json }}</code></pre>
    {% elif terminal %}
    <p><em>No response body.</em></p>
    {% else %}
    <p><em>Waiting for the pipeline to finish&hellip;</em></p>
    {% endif %}
  </article>
 </div>
--- a/src/ix/use_cases/bank_statement_header.py
+++ b/src/ix/use_cases/bank_statement_header.py
@ -26,7 +26,7 @@ class Request(BaseModel):
    model_config = ConfigDict(extra="forbid")
    use_case_name: str = "Bank Statement Header"
-    default_model: str = "qwen3:14b"
+    default_model: str = "gpt-oss:20b"
    system_prompt: str = (
        "You extract header metadata from a single bank or credit-card statement. "
        "Return only facts that appear in the document; leave a field null if uncertain. "
--- a/src/ix/use_cases/inline.py
+++ b/src/ix/use_cases/inline.py
@ -1,132 +0,0 @@
 """Dynamic Pydantic class builder for caller-supplied use cases.
 Input: an :class:`ix.contracts.request.InlineUseCase` carried on the
 :class:`~ix.contracts.request.RequestIX`.
 Output: a fresh ``(RequestClass, ResponseClass)`` pair with the same shape
 as a registered use case. The :class:`~ix.pipeline.setup_step.SetupStep`
 calls this when ``request_ix.use_case_inline`` is set, bypassing the
 registry lookup entirely.
 The builder returns brand-new classes on every call — safe to call per
 request, so two concurrent jobs can't step on each other's schemas even if
 they happen to share a ``use_case_name``. Validation errors map to
 ``IX_001_001`` (same code the registry-miss path uses); the error is
 recoverable from the caller's perspective (fix the JSON and retry), not an
 infra problem.
 """
 from __future__ import annotations
 import keyword
 import re
 from datetime import date, datetime
 from decimal import Decimal
 from typing import Any, Literal, cast
 from pydantic import BaseModel, ConfigDict, Field, create_model
 from ix.contracts.request import InlineUseCase, UseCaseFieldDef
 from ix.errors import IXErrorCode, IXException
 # Map the ``UseCaseFieldDef.type`` literal to concrete Python types.
 _TYPE_MAP: dict[str, type] = {
    "str": str,
    "int": int,
    "float": float,
    "decimal": Decimal,
    "date": date,
    "datetime": datetime,
    "bool": bool,
 }
 def _fail(detail: str) -> IXException:
    return IXException(IXErrorCode.IX_001_001, detail=detail)
 def _valid_field_name(name: str) -> bool:
    """Require a valid Python identifier that isn't a reserved keyword."""
    return name.isidentifier() and not keyword.iskeyword(name)
 def _resolve_field_type(field: UseCaseFieldDef) -> Any:
    """Return the annotation for a single field, with ``choices`` honoured."""
    base = _TYPE_MAP[field.type]
    if field.choices:
        if field.type != "str":
            raise _fail(
                f"field {field.name!r}: 'choices' is only allowed for "
                f"type='str' (got {field.type!r})"
            )
        return Literal[tuple(field.choices)]  # type: ignore[valid-type]
    return base
 def _sanitise_class_name(raw: str) -> str:
    """``re.sub(r"\\W", "_", name)`` + ``Inline_`` prefix.
    Keeps the generated class name debuggable (shows up in repr / tracebacks)
    while ensuring it's always a valid Python identifier.
    """
    return "Inline_" + re.sub(r"\W", "_", raw)
 def build_use_case_classes(
    inline: InlineUseCase,
 ) -> tuple[type[BaseModel], type[BaseModel]]:
    """Build a fresh ``(RequestClass, ResponseClass)`` from ``inline``.
    * Every call returns new classes. The caller may cache if desired; the
      pipeline intentionally does not.
    * Raises :class:`~ix.errors.IXException` with code
      :attr:`~ix.errors.IXErrorCode.IX_001_001` on any structural problem
      (empty fields, bad name, dup name, bad ``choices``).
    """
    if not inline.fields:
        raise _fail("inline use case must define at least one field")
    seen: set[str] = set()
    for fd in inline.fields:
        if not _valid_field_name(fd.name):
            raise _fail(f"field name {fd.name!r} is not a valid Python identifier")
        if fd.name in seen:
            raise _fail(f"duplicate field name {fd.name!r}")
        seen.add(fd.name)
    response_fields: dict[str, Any] = {}
    for fd in inline.fields:
        annotation = _resolve_field_type(fd)
        field_info = Field(
            ...,
            description=fd.description,
        ) if fd.required else Field(
            default=None,
            description=fd.description,
        )
        if not fd.required:
            annotation = annotation | None
        response_fields[fd.name] = (annotation, field_info)
    response_cls = create_model(  # type: ignore[call-overload]
        _sanitise_class_name(inline.use_case_name),
        __config__=ConfigDict(extra="forbid"),
        **response_fields,
    )
    request_cls = create_model(  # type: ignore[call-overload]
        "Inline_Request_" + re.sub(r"\W", "_", inline.use_case_name),
        __config__=ConfigDict(extra="forbid"),
        use_case_name=(str, inline.use_case_name),
        system_prompt=(str, inline.system_prompt),
        default_model=(str | None, inline.default_model),
    )
    return cast(type[BaseModel], request_cls), cast(type[BaseModel], response_cls)
 __all__ = ["build_use_case_classes"]
--- a/src/ix/worker/loop.py
+++ b/src/ix/worker/loop.py
@ -28,6 +28,8 @@ from collections.abc import Callable
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING
 from fastapi import FastAPI
 from ix.contracts.response import ResponseIX
 from ix.errors import IXErrorCode, IXException
 from ix.pipeline.pipeline import Pipeline
@ -177,3 +179,17 @@ class Worker:
            await session.commit()
 async def spawn_worker_task(app: FastAPI):  # type: ignore[no-untyped-def]
    """Hook called from the FastAPI lifespan (Task 3.4).
    This module-level async function is here so ``ix.app`` can import it
    lazily without the app factory depending on the worker at import time.
    Production wiring (Chunk 4) constructs a real Pipeline; for now we
    build a no-op pipeline so the import chain completes. Tests that need
    the worker wire their own Worker explicitly.
    """
    # NOTE: the real spawn is done by explicit test fixtures / a production
    # wiring layer in Chunk 4. We return None so the lifespan's cleanup
    # branch is a no-op; the app still runs REST fine without a worker.
    return None
--- a/tests/integration/test_jobs_repo.py
+++ b/tests/integration/test_jobs_repo.py
@ -341,117 +341,6 @@ async def test_sweep_orphans_leaves_fresh_running_alone(
    assert after.status == "running"
 async def test_queue_position_pending_only(
    session_factory: async_sessionmaker[AsyncSession],
 ) -> None:
    """Three pending rows in insertion order → positions 0, 1, 2; total 3.
    Each row is committed in its own transaction so the DB stamps a
    distinct ``created_at`` per row (``now()`` is transaction-stable).
    """
    async with session_factory() as session:
        a = await jobs_repo.insert_pending(
            session, _make_request("c", "qp-a"), callback_url=None
        )
        await session.commit()
    async with session_factory() as session:
        b = await jobs_repo.insert_pending(
            session, _make_request("c", "qp-b"), callback_url=None
        )
        await session.commit()
    async with session_factory() as session:
        c = await jobs_repo.insert_pending(
            session, _make_request("c", "qp-c"), callback_url=None
        )
        await session.commit()
    async with session_factory() as session:
        pa = await jobs_repo.queue_position(session, a.job_id)
        pb = await jobs_repo.queue_position(session, b.job_id)
        pc = await jobs_repo.queue_position(session, c.job_id)
    # All three active; total == 3.
    assert pa == (0, 3)
    assert pb == (1, 3)
    assert pc == (2, 3)
 async def test_queue_position_running_plus_pending(
    session_factory: async_sessionmaker[AsyncSession],
 ) -> None:
    """One running + two pending → running:(0,3), next:(1,3), last:(2,3)."""
    async with session_factory() as session:
        first = await jobs_repo.insert_pending(
            session, _make_request("c", "qp-r-1"), callback_url=None
        )
        await session.commit()
    async with session_factory() as session:
        second = await jobs_repo.insert_pending(
            session, _make_request("c", "qp-r-2"), callback_url=None
        )
        await session.commit()
    async with session_factory() as session:
        third = await jobs_repo.insert_pending(
            session, _make_request("c", "qp-r-3"), callback_url=None
        )
        await session.commit()
    # Claim the first → it becomes running.
    async with session_factory() as session:
        claimed = await jobs_repo.claim_next_pending(session)
        await session.commit()
    assert claimed is not None
    assert claimed.job_id == first.job_id
    async with session_factory() as session:
        p_running = await jobs_repo.queue_position(session, first.job_id)
        p_second = await jobs_repo.queue_position(session, second.job_id)
        p_third = await jobs_repo.queue_position(session, third.job_id)
    # Running row reports 0 ahead (itself is the head).
    assert p_running == (0, 3)
    # Second pending: running is ahead (1) + zero older pendings.
    assert p_second == (1, 3)
    # Third pending: running ahead + one older pending.
    assert p_third == (2, 3)
 async def test_queue_position_terminal_returns_zero_zero(
    session_factory: async_sessionmaker[AsyncSession],
 ) -> None:
    """Finished jobs have no queue position — always (0, 0)."""
    async with session_factory() as session:
        inserted = await jobs_repo.insert_pending(
            session, _make_request("c", "qp-term"), callback_url=None
        )
        await session.commit()
    response = ResponseIX(
        use_case="bank_statement_header",
        ix_client_id="c",
        request_id="qp-term",
    )
    async with session_factory() as session:
        await jobs_repo.mark_done(session, inserted.job_id, response)
        await session.commit()
    async with session_factory() as session:
        pos = await jobs_repo.queue_position(session, inserted.job_id)
    assert pos == (0, 0)
 async def test_queue_position_unknown_id_returns_zero_zero(
    session_factory: async_sessionmaker[AsyncSession],
 ) -> None:
    async with session_factory() as session:
        pos = await jobs_repo.queue_position(session, uuid4())
    assert pos == (0, 0)
 async def test_concurrent_claim_never_double_dispatches(
    session_factory: async_sessionmaker[AsyncSession],
 ) -> None:
--- a/tests/integration/test_ui_routes.py
+++ b/tests/integration/test_ui_routes.py
@ -1,537 +0,0 @@
 """Integration tests for the `/ui` router (spec §PR 2).
 Covers the full round-trip through `POST /ui/jobs` — the handler parses
 multipart form data into a `RequestIX` and hands it to
 `ix.store.jobs_repo.insert_pending`, the same entry point the REST adapter
 uses. Tests assert the job row exists with the right client/request ids and
 that custom-use-case forms produce a `use_case_inline` block in the stored
 request JSON.
 The DB-touching tests depend on the shared integration conftest which
 spins up migrations against the configured Postgres; the pure-template
 tests (`GET /ui` and the fragment renderer) still need a factory but
 won't actually query — they're cheap.
 """
 from __future__ import annotations
 import json
 from collections.abc import Iterator
 from pathlib import Path
 from uuid import UUID, uuid4
 import pytest
 from fastapi.testclient import TestClient
 from sqlalchemy import select
 from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
 from ix.adapters.rest.routes import Probes, get_probes, get_session_factory_dep
 from ix.app import create_app
 from ix.store.models import IxJob
 FIXTURE_DIR = Path(__file__).resolve().parents[1] / "fixtures"
 FIXTURE_PDF = FIXTURE_DIR / "synthetic_giro.pdf"
 def _factory_for_url(postgres_url: str):  # type: ignore[no-untyped-def]
    def _factory():  # type: ignore[no-untyped-def]
        eng = create_async_engine(postgres_url, pool_pre_ping=True)
        return async_sessionmaker(eng, expire_on_commit=False)
    return _factory
@pytest.fixture
 def app(postgres_url: str) -> Iterator[TestClient]:
    app_obj = create_app(spawn_worker=False)
    app_obj.dependency_overrides[get_session_factory_dep] = _factory_for_url(
        postgres_url
    )
    app_obj.dependency_overrides[get_probes] = lambda: Probes(
        ollama=lambda: "ok", ocr=lambda: "ok"
    )
    with TestClient(app_obj) as client:
        yield client
 class TestIndexPage:
    def test_index_returns_html(self, app: TestClient) -> None:
        resp = app.get("/ui")
        assert resp.status_code == 200
        assert "text/html" in resp.headers["content-type"]
        body = resp.text
        # Dropdown prefilled with the registered use case.
        assert "bank_statement_header" in body
        # Marker for the submission form.
        assert '<form' in body
    def test_static_mount_is_reachable(self, app: TestClient) -> None:
        # StaticFiles returns 404 for the keepfile; the mount itself must
        # exist so asset URLs resolve. We probe the directory root instead.
        resp = app.get("/ui/static/.gitkeep")
        # .gitkeep exists in the repo — expect 200 (or at minimum not a 404
        # due to missing mount). A 405/403 would also indicate the mount is
        # wired; we assert the response is *not* a 404 from a missing route.
        assert resp.status_code != 404
 class TestSubmitJobRegistered:
    def test_post_registered_use_case_creates_row(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        request_id = f"ui-reg-{uuid4().hex[:8]}"
        with FIXTURE_PDF.open("rb") as fh:
            resp = app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "registered",
                    "use_case_name": "bank_statement_header",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                    "texts": "",
                    "use_ocr": "on",
                    "include_provenance": "on",
                    "max_sources_per_field": "10",
                },
                files={"pdf": ("sample.pdf", fh, "application/pdf")},
                follow_redirects=False,
            )
        assert resp.status_code in (200, 303), resp.text
        # Assert the row exists in the DB.
        job_row = _find_job(postgres_url, "ui-test", request_id)
        assert job_row is not None
        assert job_row.status == "pending"
        assert job_row.request["use_case"] == "bank_statement_header"
        # Context.files must reference a local file:// path.
        files = job_row.request["context"]["files"]
        assert len(files) == 1
        entry = files[0]
        url = entry if isinstance(entry, str) else entry["url"]
        assert url.startswith("file://")
    def test_htmx_submit_uses_hx_redirect_header(
        self,
        app: TestClient,
    ) -> None:
        request_id = f"ui-htmx-{uuid4().hex[:8]}"
        with FIXTURE_PDF.open("rb") as fh:
            resp = app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "registered",
                    "use_case_name": "bank_statement_header",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                },
                files={"pdf": ("sample.pdf", fh, "application/pdf")},
                headers={"HX-Request": "true"},
                follow_redirects=False,
            )
        assert resp.status_code == 200
        assert "HX-Redirect" in resp.headers
 class TestSubmitJobCustom:
    def test_post_custom_use_case_stores_inline(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        request_id = f"ui-cust-{uuid4().hex[:8]}"
        fields_json = json.dumps(
            [
                {"name": "vendor", "type": "str", "required": True},
                {"name": "total", "type": "decimal"},
            ]
        )
        with FIXTURE_PDF.open("rb") as fh:
            resp = app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "custom",
                    "use_case_name": "invoice_adhoc",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                    "system_prompt": "Extract vendor and total.",
                    "default_model": "qwen3:14b",
                    "fields_json": fields_json,
                },
                files={"pdf": ("sample.pdf", fh, "application/pdf")},
                follow_redirects=False,
            )
        assert resp.status_code in (200, 303), resp.text
        job_row = _find_job(postgres_url, "ui-test", request_id)
        assert job_row is not None
        stored = job_row.request["use_case_inline"]
        assert stored is not None
        assert stored["use_case_name"] == "invoice_adhoc"
        assert stored["system_prompt"] == "Extract vendor and total."
        names = [f["name"] for f in stored["fields"]]
        assert names == ["vendor", "total"]
    def test_post_malformed_fields_json_rejected(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        request_id = f"ui-bad-{uuid4().hex[:8]}"
        with FIXTURE_PDF.open("rb") as fh:
            resp = app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "custom",
                    "use_case_name": "adhoc_bad",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                    "system_prompt": "p",
                    "fields_json": "this is not json",
                },
                files={"pdf": ("sample.pdf", fh, "application/pdf")},
                follow_redirects=False,
            )
        # Either re-rendered form (422 / 200 with error) — what matters is
        # that no row was inserted.
        assert resp.status_code in (200, 400, 422)
        job_row = _find_job(postgres_url, "ui-test", request_id)
        assert job_row is None
        # A helpful error should appear somewhere in the body.
        assert (
            "error" in resp.text.lower()
            or "invalid" in resp.text.lower()
            or "json" in resp.text.lower()
        )
 class TestDisplayName:
    def test_post_persists_display_name_in_file_ref(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        """The client-provided upload filename lands in FileRef.display_name."""
        request_id = f"ui-name-{uuid4().hex[:8]}"
        with FIXTURE_PDF.open("rb") as fh:
            resp = app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "registered",
                    "use_case_name": "bank_statement_header",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                },
                files={
                    "pdf": ("my statement.pdf", fh, "application/pdf")
                },
                follow_redirects=False,
            )
        assert resp.status_code in (200, 303), resp.text
        job_row = _find_job(postgres_url, "ui-test", request_id)
        assert job_row is not None
        entry = job_row.request["context"]["files"][0]
        assert isinstance(entry, dict)
        assert entry["display_name"] == "my statement.pdf"
 class TestFragment:
    def test_fragment_pending_has_trigger(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        request_id = f"ui-frag-p-{uuid4().hex[:8]}"
        with FIXTURE_PDF.open("rb") as fh:
            app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "registered",
                    "use_case_name": "bank_statement_header",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                },
                files={"pdf": ("sample.pdf", fh, "application/pdf")},
                follow_redirects=False,
            )
        job_row = _find_job(postgres_url, "ui-test", request_id)
        assert job_row is not None
        resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
        assert resp.status_code == 200
        body = resp.text
        # Pending → auto-refresh every 2s.
        assert "hx-trigger" in body
        assert "2s" in body
        assert "pending" in body.lower() or "running" in body.lower()
        # New queue-awareness copy.
        assert "Queue position" in body or "About to start" in body
    def test_fragment_pending_shows_filename(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        request_id = f"ui-frag-pf-{uuid4().hex[:8]}"
        with FIXTURE_PDF.open("rb") as fh:
            app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "registered",
                    "use_case_name": "bank_statement_header",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                },
                files={
                    "pdf": (
                        "client-side-name.pdf",
                        fh,
                        "application/pdf",
                    )
                },
                follow_redirects=False,
            )
        job_row = _find_job(postgres_url, "ui-test", request_id)
        assert job_row is not None
        resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
        assert resp.status_code == 200
        assert "client-side-name.pdf" in resp.text
    def test_fragment_running_shows_elapsed(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        """After flipping a row to running with a backdated started_at, the
        fragment renders a ``Running for MM:SS`` line."""
        request_id = f"ui-frag-r-{uuid4().hex[:8]}"
        with FIXTURE_PDF.open("rb") as fh:
            app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "registered",
                    "use_case_name": "bank_statement_header",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                },
                files={"pdf": ("sample.pdf", fh, "application/pdf")},
                follow_redirects=False,
            )
        job_row = _find_job(postgres_url, "ui-test", request_id)
        assert job_row is not None
        _force_running(postgres_url, job_row.job_id)
        resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
        assert resp.status_code == 200
        body = resp.text
        assert "Running for" in body
        # MM:SS; our backdate is ~10s so expect 00:1? or higher.
        import re
        assert re.search(r"\d{2}:\d{2}", body), body
    def test_fragment_backward_compat_no_display_name(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        """Older rows (stored before display_name existed) must still render."""
        from ix.contracts.request import Context, FileRef, RequestIX
        legacy_req = RequestIX(
            use_case="bank_statement_header",
            ix_client_id="ui-test",
            request_id=f"ui-legacy-{uuid4().hex[:8]}",
            context=Context(
                files=[
                    FileRef(url="file:///tmp/ix/ui/legacy.pdf")
                ]
            ),
        )
        import asyncio
        from ix.store import jobs_repo as _repo
        async def _insert() -> UUID:
            eng = create_async_engine(postgres_url)
            sf = async_sessionmaker(eng, expire_on_commit=False)
            try:
                async with sf() as session:
                    job = await _repo.insert_pending(
                        session, legacy_req, callback_url=None
                    )
                    await session.commit()
                    return job.job_id
            finally:
                await eng.dispose()
        job_id = asyncio.run(_insert())
        resp = app.get(f"/ui/jobs/{job_id}/fragment")
        assert resp.status_code == 200
        body = resp.text
        # Must not crash; must include the fallback basename from the URL.
        assert "legacy.pdf" in body
    def test_fragment_done_shows_pretty_json(
        self,
        app: TestClient,
        postgres_url: str,
    ) -> None:
        request_id = f"ui-frag-d-{uuid4().hex[:8]}"
        with FIXTURE_PDF.open("rb") as fh:
            app.post(
                "/ui/jobs",
                data={
                    "use_case_mode": "registered",
                    "use_case_name": "bank_statement_header",
                    "ix_client_id": "ui-test",
                    "request_id": request_id,
                },
                files={
                    "pdf": (
                        "my-done-doc.pdf",
                        fh,
                        "application/pdf",
                    )
                },
                follow_redirects=False,
            )
        job_row = _find_job(postgres_url, "ui-test", request_id)
        assert job_row is not None
        # Hand-tick the row to done with a fake response.
        _force_done(
            postgres_url,
            job_row.job_id,
            response_body={
                "use_case": "bank_statement_header",
                "ix_result": {"result": {"bank_name": "UBS AG", "currency": "CHF"}},
            },
        )
        resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
        assert resp.status_code == 200
        body = resp.text
        # Terminal → no auto-refresh.
        assert "every 2s" not in body and "every  2s" not in body
        # JSON present.
        assert "UBS AG" in body
        assert "CHF" in body
        # Filename surfaced on the done fragment.
        assert "my-done-doc.pdf" in body
 def _find_job(postgres_url: str, client_id: str, request_id: str):  # type: ignore[no-untyped-def]
    """Look up an ``ix_jobs`` row via the async engine, wrapping the coroutine
    for test convenience."""
    import asyncio
    import json as _json
    async def _go():  # type: ignore[no-untyped-def]
        eng = create_async_engine(postgres_url)
        sf = async_sessionmaker(eng, expire_on_commit=False)
        try:
            async with sf() as session:
                r = await session.scalar(
                    select(IxJob).where(
                        IxJob.client_id == client_id,
                        IxJob.request_id == request_id,
                    )
                )
                if r is None:
                    return None
                class _JobRow:
                    pass
                out = _JobRow()
                out.job_id = r.job_id
                out.client_id = r.client_id
                out.request_id = r.request_id
                out.status = r.status
                if isinstance(r.request, str):
                    out.request = _json.loads(r.request)
                else:
                    out.request = r.request
                return out
        finally:
            await eng.dispose()
    return asyncio.run(_go())
 def _force_done(
    postgres_url: str,
    job_id,  # type: ignore[no-untyped-def]
    response_body: dict,
 ) -> None:
    """Flip a pending job to ``done`` with the given response payload."""
    import asyncio
    from datetime import UTC, datetime
    from sqlalchemy import text
    async def _go():  # type: ignore[no-untyped-def]
        eng = create_async_engine(postgres_url)
        try:
            async with eng.begin() as conn:
                await conn.execute(
                    text(
                        "UPDATE ix_jobs SET status='done', "
                        "response=CAST(:resp AS JSONB), finished_at=:now "
                        "WHERE job_id=:jid"
                    ),
                    {
                        "resp": json.dumps(response_body),
                        "now": datetime.now(UTC),
                        "jid": str(job_id),
                    },
                )
        finally:
            await eng.dispose()
    asyncio.run(_go())
 def _force_running(
    postgres_url: str,
    job_id,  # type: ignore[no-untyped-def]
    seconds_ago: int = 10,
 ) -> None:
    """Flip a pending job to ``running`` with a backdated ``started_at``.
    The fragment renders "Running for MM:SS" which needs a ``started_at`` in
    the past; 10s is enough to produce a deterministic non-zero MM:SS.
    """
    import asyncio
    from datetime import UTC, datetime, timedelta
    from sqlalchemy import text
    async def _go():  # type: ignore[no-untyped-def]
        eng = create_async_engine(postgres_url)
        try:
            async with eng.begin() as conn:
                await conn.execute(
                    text(
                        "UPDATE ix_jobs SET status='running', started_at=:t "
                        "WHERE job_id=:jid"
                    ),
                    {
                        "t": datetime.now(UTC) - timedelta(seconds=seconds_ago),
                        "jid": str(job_id),
                    },
                )
        finally:
            await eng.dispose()
    asyncio.run(_go())
--- a/tests/live/test_ollama_client_live.py
+++ b/tests/live/test_ollama_client_live.py
@ -5,7 +5,7 @@ Never runs in CI (Forgejo runner has no LAN access to Ollama). Run locally::
    IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v
 Assumes the Ollama server at ``http://192.168.68.42:11434`` already has
-``qwen3:14b`` pulled.
+``gpt-oss:20b`` pulled.
 """
 from __future__ import annotations
@ -26,7 +26,7 @@ pytestmark = [
 ]
 _OLLAMA_URL = "http://192.168.68.42:11434"
-_MODEL = "qwen3:14b"
+_MODEL = "gpt-oss:20b"
 async def test_structured_output_round_trip() -> None:
--- a/tests/live/test_surya_client_live.py
+++ b/tests/live/test_surya_client_live.py
@ -1,83 +0,0 @@
 """Live test for :class:`SuryaOCRClient` — gated on ``IX_TEST_OLLAMA=1``.
 Downloads real Surya models (hundreds of MB) on first run. Never runs in
 CI. Exercised locally with::
    IX_TEST_OLLAMA=1 uv run pytest tests/live/test_surya_client_live.py -v
 Note: requires the ``[ocr]`` extra — ``uv sync --extra ocr --extra dev``.
 """
 from __future__ import annotations
 import os
 from pathlib import Path
 import pytest
 from ix.contracts import Page
 from ix.segmentation import PageMetadata
 pytestmark = [
    pytest.mark.live,
    pytest.mark.skipif(
        os.environ.get("IX_TEST_OLLAMA") != "1",
        reason="live: IX_TEST_OLLAMA=1 required",
    ),
 ]
 async def test_extracts_dkb_and_iban_from_synthetic_giro() -> None:
    """Real Surya run against ``tests/fixtures/synthetic_giro.pdf``.
    Assert the flat text contains ``"DKB"`` and the IBAN without spaces.
    """
    from ix.ocr.surya_client import SuryaOCRClient
    fixture = Path(__file__).parent.parent / "fixtures" / "synthetic_giro.pdf"
    assert fixture.exists(), f"missing fixture: {fixture}"
    # Build Pages the way DocumentIngestor would for this PDF: count pages
    # via PyMuPDF so we pass the right number of inputs.
    import fitz
    doc = fitz.open(str(fixture))
    try:
        pages = [
            Page(
                page_no=i + 1,
                width=float(p.rect.width),
                height=float(p.rect.height),
                lines=[],
            )
            for i, p in enumerate(doc)
        ]
    finally:
        doc.close()
    client = SuryaOCRClient()
    result = await client.ocr(
        pages,
        files=[(fixture, "application/pdf")],
        page_metadata=[PageMetadata(file_index=0) for _ in pages],
    )
    flat_text = result.result.text or ""
    # Join page-level line texts if flat text missing (shape-safety).
    if not flat_text:
        flat_text = "\n".join(
            line.text or ""
            for page in result.result.pages
            for line in page.lines
        )
    assert "DKB" in flat_text
    assert "DE89370400440532013000" in flat_text.replace(" ", "")
 async def test_selfcheck_ok_against_real_predictors() -> None:
    """``selfcheck()`` returns ``ok`` once Surya's predictors load."""
    from ix.ocr.surya_client import SuryaOCRClient
    client = SuryaOCRClient()
    assert await client.selfcheck() == "ok"
--- a/tests/unit/test_app_wiring.py
+++ b/tests/unit/test_app_wiring.py
@ -1,104 +0,0 @@
 """Tests for ``ix.app`` lifespan / probe wiring (Task 4.3).
 The lifespan selects fake clients when ``IX_TEST_MODE=fake`` and exposes
 their probes via the route DI hook. These tests exercise the probe
 adapter in isolation — no DB, no real Ollama/Surya.
 """
 from __future__ import annotations
 from typing import Literal
 from ix.app import _make_ocr_probe, _make_ollama_probe, build_pipeline
 from ix.config import AppConfig
 from ix.genai.fake import FakeGenAIClient
 from ix.ocr.fake import FakeOCRClient
 from ix.pipeline.genai_step import GenAIStep
 from ix.pipeline.ocr_step import OCRStep
 from ix.pipeline.pipeline import Pipeline
 from ix.pipeline.reliability_step import ReliabilityStep
 from ix.pipeline.response_handler_step import ResponseHandlerStep
 from ix.pipeline.setup_step import SetupStep
 def _cfg(**overrides: object) -> AppConfig:
    return AppConfig(_env_file=None, **overrides)  # type: ignore[call-arg]
 class _SelfcheckOllamaClient:
    async def invoke(self, *a: object, **kw: object) -> object:
        raise NotImplementedError
    async def selfcheck(
        self, expected_model: str
    ) -> Literal["ok", "degraded", "fail"]:
        self.called_with = expected_model
        return "ok"
 class _SelfcheckOCRClient:
    async def ocr(self, *a: object, **kw: object) -> object:
        raise NotImplementedError
    async def selfcheck(self) -> Literal["ok", "fail"]:
        return "ok"
 class _BrokenSelfcheckOllama:
    async def invoke(self, *a: object, **kw: object) -> object:
        raise NotImplementedError
    async def selfcheck(
        self, expected_model: str
    ) -> Literal["ok", "degraded", "fail"]:
        raise RuntimeError("boom")
 class TestOllamaProbe:
    def test_fake_client_without_selfcheck_reports_ok(self) -> None:
        cfg = _cfg(test_mode="fake", default_model="gpt-oss:20b")
        probe = _make_ollama_probe(FakeGenAIClient(parsed=None), cfg)
        assert probe() == "ok"
    def test_real_selfcheck_returns_its_verdict(self) -> None:
        cfg = _cfg(default_model="gpt-oss:20b")
        client = _SelfcheckOllamaClient()
        probe = _make_ollama_probe(client, cfg)  # type: ignore[arg-type]
        assert probe() == "ok"
        assert client.called_with == "gpt-oss:20b"
    def test_selfcheck_exception_falls_back_to_fail(self) -> None:
        cfg = _cfg(default_model="gpt-oss:20b")
        probe = _make_ollama_probe(_BrokenSelfcheckOllama(), cfg)  # type: ignore[arg-type]
        assert probe() == "fail"
 class TestOCRProbe:
    def test_fake_client_without_selfcheck_reports_ok(self) -> None:
        from ix.contracts.response import OCRDetails, OCRResult
        probe = _make_ocr_probe(FakeOCRClient(canned=OCRResult(result=OCRDetails())))
        assert probe() == "ok"
    def test_real_selfcheck_returns_its_verdict(self) -> None:
        probe = _make_ocr_probe(_SelfcheckOCRClient())  # type: ignore[arg-type]
        assert probe() == "ok"
 class TestBuildPipeline:
    def test_assembles_all_five_steps_in_order(self) -> None:
        from ix.contracts.response import OCRDetails, OCRResult
        genai = FakeGenAIClient(parsed=None)
        ocr = FakeOCRClient(canned=OCRResult(result=OCRDetails()))
        cfg = _cfg(test_mode="fake")
        pipeline = build_pipeline(genai, ocr, cfg)
        assert isinstance(pipeline, Pipeline)
        steps = pipeline._steps  # type: ignore[attr-defined]
        assert [type(s) for s in steps] == [
            SetupStep,
            OCRStep,
            GenAIStep,
            ReliabilityStep,
            ResponseHandlerStep,
        ]
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@ -51,10 +51,10 @@ def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:
    assert cfg.postgres_url == (
        "postgresql+asyncpg://infoxtractor:<password>"
-        "@127.0.0.1:5431/infoxtractor"
+        "@host.docker.internal:5431/infoxtractor"
    )
-    assert cfg.ollama_url == "http://127.0.0.1:11434"
+    assert cfg.ollama_url == "http://host.docker.internal:11434"
-    assert cfg.default_model == "qwen3:14b"
+    assert cfg.default_model == "gpt-oss:20b"
    assert cfg.ocr_engine == "surya"
    assert cfg.tmp_dir == "/tmp/ix"
    assert cfg.pipeline_worker_concurrency == 1
--- a/tests/unit/test_contracts.py
+++ b/tests/unit/test_contracts.py
@ -31,7 +31,6 @@ from ix.contracts import (
    ResponseIX,
    SegmentCitation,
 )
 from ix.contracts.request import InlineUseCase, UseCaseFieldDef
 class TestFileRef:
@ -50,24 +49,6 @@ class TestFileRef:
        assert fr.headers == {"Authorization": "Token abc"}
        assert fr.max_bytes == 1_000_000
    def test_display_name_defaults_to_none(self) -> None:
        fr = FileRef(url="file:///tmp/ix/ui/abc.pdf")
        assert fr.display_name is None
    def test_display_name_roundtrip(self) -> None:
        fr = FileRef(
            url="file:///tmp/ix/ui/abc.pdf",
            display_name="my statement.pdf",
        )
        assert fr.display_name == "my statement.pdf"
        dumped = fr.model_dump_json()
        rt = FileRef.model_validate_json(dumped)
        assert rt.display_name == "my statement.pdf"
        # Backward-compat: a serialised FileRef without display_name still
        # validates cleanly (older stored jobs predate the field).
        legacy = FileRef.model_validate({"url": "file:///x.pdf"})
        assert legacy.display_name is None
 class TestOptionDefaults:
    def test_ocr_defaults_match_spec(self) -> None:
@ -201,32 +182,6 @@ class TestRequestIX:
        with pytest.raises(ValidationError):
            RequestIX.model_validate({"use_case": "x"})
    def test_use_case_inline_defaults_to_none(self) -> None:
        r = RequestIX(**self._minimal_payload())
        assert r.use_case_inline is None
    def test_use_case_inline_roundtrip(self) -> None:
        payload = self._minimal_payload()
        payload["use_case_inline"] = {
            "use_case_name": "adhoc",
            "system_prompt": "extract stuff",
            "fields": [
                {"name": "a", "type": "str", "required": True},
                {"name": "b", "type": "int"},
            ],
        }
        r = RequestIX.model_validate(payload)
        assert r.use_case_inline is not None
        assert isinstance(r.use_case_inline, InlineUseCase)
        assert r.use_case_inline.use_case_name == "adhoc"
        assert len(r.use_case_inline.fields) == 2
        assert isinstance(r.use_case_inline.fields[0], UseCaseFieldDef)
        # Round-trip through JSON
        dumped = r.model_dump_json()
        r2 = RequestIX.model_validate_json(dumped)
        assert r2.use_case_inline is not None
        assert r2.use_case_inline.fields[1].type == "int"
 class TestOCRResult:
    def test_minimal_defaults(self) -> None:
--- a/tests/unit/test_factories.py
+++ b/tests/unit/test_factories.py
@ -1,60 +0,0 @@
 """Tests for the GenAI + OCR factories (Task 4.3).
 The factories pick between fake and real clients based on
 ``IX_TEST_MODE``. CI runs with ``IX_TEST_MODE=fake``, production runs
 without — so the selection knob is the one lever between hermetic CI and
 real clients.
 """
 from __future__ import annotations
 from ix.config import AppConfig
 from ix.genai import make_genai_client
 from ix.genai.fake import FakeGenAIClient
 from ix.genai.ollama_client import OllamaClient
 from ix.ocr import make_ocr_client
 from ix.ocr.fake import FakeOCRClient
 from ix.ocr.surya_client import SuryaOCRClient
 def _cfg(**overrides: object) -> AppConfig:
    """Build an AppConfig without loading the repo's .env.example."""
    return AppConfig(_env_file=None, **overrides)  # type: ignore[call-arg]
 class TestGenAIFactory:
    def test_fake_mode_returns_fake(self) -> None:
        cfg = _cfg(test_mode="fake")
        client = make_genai_client(cfg)
        assert isinstance(client, FakeGenAIClient)
    def test_production_returns_ollama_with_configured_url(self) -> None:
        cfg = _cfg(
            test_mode=None,
            ollama_url="http://ollama.host:11434",
            genai_call_timeout_seconds=42,
        )
        client = make_genai_client(cfg)
        assert isinstance(client, OllamaClient)
        # Inspect the private attrs for binding correctness.
        assert client._base_url == "http://ollama.host:11434"
        assert client._per_call_timeout_s == 42
 class TestOCRFactory:
    def test_fake_mode_returns_fake(self) -> None:
        cfg = _cfg(test_mode="fake")
        client = make_ocr_client(cfg)
        assert isinstance(client, FakeOCRClient)
    def test_production_surya_returns_surya(self) -> None:
        cfg = _cfg(test_mode=None, ocr_engine="surya")
        client = make_ocr_client(cfg)
        assert isinstance(client, SuryaOCRClient)
    def test_unknown_engine_raises(self) -> None:
        cfg = _cfg(test_mode=None, ocr_engine="tesseract")
        import pytest
        with pytest.raises(ValueError, match="ocr_engine"):
            make_ocr_client(cfg)
--- a/tests/unit/test_genai_step.py
+++ b/tests/unit/test_genai_step.py
@ -363,8 +363,8 @@ class TestModelSelection:
        req = _make_request(include_provenance=False)
        resp = _response_with_segment_index(lines=["hello"])
        await step.process(req, resp)
-        # use-case default is qwen3:14b
+        # use-case default is gpt-oss:20b
-        assert client.request_kwargs["model"] == "qwen3:14b"  # type: ignore[index]
+        assert client.request_kwargs["model"] == "gpt-oss:20b"  # type: ignore[index]
 # ----------------------------------------------------------------------------
--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@ -79,19 +79,10 @@ class TestInvokeHappyPath:
        body_json = json.loads(body)
        assert body_json["model"] == "gpt-oss:20b"
        assert body_json["stream"] is False
-        # No `format` is sent: Ollama 0.11.8 segfaults on full schemas and
+        assert body_json["format"] == _Schema.model_json_schema()
        # aborts to `{}` with `format=json` on reasoning models. Schema is
        # injected into the system prompt instead; we extract the trailing
        # JSON blob from the response and validate via Pydantic.
        assert "format" not in body_json
        assert body_json["options"]["temperature"] == 0.2
        assert "reasoning_effort" not in body_json
-        # A schema-guidance system message is prepended to the caller's
+        assert body_json["messages"] == [
        # messages so Ollama (format=json loose mode) emits the right shape.
        msgs = body_json["messages"]
        assert msgs[0]["role"] == "system"
        assert "JSON Schema" in msgs[0]["content"]
        assert msgs[1:] == [
            {"role": "system", "content": "You extract."},
            {"role": "user", "content": "Doc body"},
        ]
@ -125,10 +116,7 @@ class TestInvokeHappyPath:
        import json
        request_body = json.loads(httpx_mock.get_requests()[0].read())
-        # First message is the auto-injected schema guidance; after that
+        assert request_body["messages"] == [
        # the caller's user message has its text parts joined.
        assert request_body["messages"][0]["role"] == "system"
        assert request_body["messages"][1:] == [
            {"role": "user", "content": "part-a\npart-b"}
        ]
--- a/tests/unit/test_setup_step.py
+++ b/tests/unit/test_setup_step.py
@ -15,7 +15,6 @@ from ix.contracts import (
    RequestIX,
    ResponseIX,
 )
 from ix.contracts.request import InlineUseCase, UseCaseFieldDef
 from ix.contracts.response import _InternalContext
 from ix.errors import IXErrorCode, IXException
 from ix.ingestion import FetchConfig
@ -245,102 +244,6 @@ class TestTextOnly:
        assert ctx.texts == ["hello", "there"]
 class TestInlineUseCase:
    def _make_inline_request(
        self,
        inline: InlineUseCase,
        use_case: str = "adhoc-label",
        texts: list[str] | None = None,
    ) -> RequestIX:
        return RequestIX(
            use_case=use_case,
            use_case_inline=inline,
            ix_client_id="test",
            request_id="r-inline",
            context=Context(files=[], texts=texts or ["hello"]),
            options=Options(
                ocr=OCROptions(use_ocr=True),
                provenance=ProvenanceOptions(include_provenance=True),
            ),
        )
    async def test_inline_use_case_overrides_registry(self, tmp_path: Path) -> None:
        fetcher = FakeFetcher({})
        ingestor = FakeIngestor([])
        step = SetupStep(
            fetcher=fetcher,
            ingestor=ingestor,
            tmp_dir=tmp_path / "work",
            fetch_config=_make_cfg(),
            mime_detector=_AlwaysMimePdf(),
        )
        inline = InlineUseCase(
            use_case_name="adhoc",
            system_prompt="Extract things.",
            fields=[
                UseCaseFieldDef(name="vendor", type="str", required=True),
                UseCaseFieldDef(name="amount", type="decimal"),
            ],
        )
        req = self._make_inline_request(inline)
        resp = _make_response()
        resp = await step.process(req, resp)
        ctx = resp.context
        assert ctx is not None
        # The response class must have been built from our field list.
        resp_cls = ctx.use_case_response  # type: ignore[union-attr]
        assert set(resp_cls.model_fields.keys()) == {"vendor", "amount"}
        # Public display name reflects the inline label.
        assert resp.use_case_name == "adhoc"
    async def test_inline_precedence_when_both_set(self, tmp_path: Path) -> None:
        # ``use_case`` is a valid registered name; ``use_case_inline`` is also
        # present. Inline MUST win (documented precedence).
        fetcher = FakeFetcher({})
        ingestor = FakeIngestor([])
        step = SetupStep(
            fetcher=fetcher,
            ingestor=ingestor,
            tmp_dir=tmp_path / "work",
            fetch_config=_make_cfg(),
            mime_detector=_AlwaysMimePdf(),
        )
        inline = InlineUseCase(
            use_case_name="override",
            system_prompt="override prompt",
            fields=[UseCaseFieldDef(name="just_me", type="str", required=True)],
        )
        req = self._make_inline_request(
            inline, use_case="bank_statement_header"
        )
        resp = await step.process(req, _make_response())
        resp_cls = resp.context.use_case_response  # type: ignore[union-attr]
        assert set(resp_cls.model_fields.keys()) == {"just_me"}
    async def test_inline_with_bad_field_raises_ix_001_001(
        self, tmp_path: Path
    ) -> None:
        fetcher = FakeFetcher({})
        ingestor = FakeIngestor([])
        step = SetupStep(
            fetcher=fetcher,
            ingestor=ingestor,
            tmp_dir=tmp_path / "work",
            fetch_config=_make_cfg(),
            mime_detector=_AlwaysMimePdf(),
        )
        inline = InlineUseCase(
            use_case_name="bad",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="123bad", type="str")],
        )
        req = self._make_inline_request(inline)
        with pytest.raises(IXException) as ei:
            await step.process(req, _make_response())
        assert ei.value.code is IXErrorCode.IX_001_001
 class TestInternalContextShape:
    async def test_context_is_internal_context_instance(self, tmp_path: Path) -> None:
        fetcher = FakeFetcher({})
--- a/tests/unit/test_surya_client.py
+++ b/tests/unit/test_surya_client.py
@ -1,238 +0,0 @@
 """Tests for :class:`SuryaOCRClient` — hermetic, no model download.
 The real Surya predictors are patched out with :class:`unittest.mock.MagicMock`
 that return trivially-shaped line objects. The tests assert the client's
 translation layer — flattening polygons, mapping text_lines → ``Line``,
 preserving ``page_no``/``width``/``height`` per input page.
 """
 from __future__ import annotations
 from pathlib import Path
 from types import SimpleNamespace
 from unittest.mock import MagicMock, patch
 import pytest
 from ix.contracts import Page
 from ix.ocr.surya_client import SuryaOCRClient
 from ix.segmentation import PageMetadata
 def _make_surya_line(text: str, polygon: list[list[float]]) -> SimpleNamespace:
    """Mimic ``surya.recognition.schema.TextLine`` duck-typing-style."""
    return SimpleNamespace(text=text, polygon=polygon, confidence=0.95)
 def _make_surya_ocr_result(lines: list[SimpleNamespace]) -> SimpleNamespace:
    """Mimic ``surya.recognition.schema.OCRResult``."""
    return SimpleNamespace(text_lines=lines, image_bbox=[0, 0, 100, 100])
 class TestOCRBuildsOCRResultFromMockedPredictors:
    async def test_one_image_one_line_flatten_polygon(self, tmp_path: Path) -> None:
        img_path = tmp_path / "a.png"
        _write_tiny_png(img_path)
        mock_line = _make_surya_line(
            text="hello",
            polygon=[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]],
        )
        mock_predictor = MagicMock(
            return_value=[_make_surya_ocr_result([mock_line])]
        )
        client = SuryaOCRClient()
        # Skip the real warm_up; inject the mock directly.
        client._recognition_predictor = mock_predictor
        client._detection_predictor = MagicMock()
        pages = [Page(page_no=1, width=100.0, height=50.0, lines=[])]
        result = await client.ocr(
            pages,
            files=[(img_path, "image/png")],
            page_metadata=[PageMetadata(file_index=0)],
        )
        assert len(result.result.pages) == 1
        out_page = result.result.pages[0]
        assert out_page.page_no == 1
        assert out_page.width == 100.0
        assert out_page.height == 50.0
        assert len(out_page.lines) == 1
        assert out_page.lines[0].text == "hello"
        assert out_page.lines[0].bounding_box == [
            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
        ]
    async def test_multiple_pages_preserves_order(self, tmp_path: Path) -> None:
        img_a = tmp_path / "a.png"
        img_b = tmp_path / "b.png"
        _write_tiny_png(img_a)
        _write_tiny_png(img_b)
        mock_predictor = MagicMock(
            return_value=[
                _make_surya_ocr_result(
                    [_make_surya_line("a-line", [[0, 0], [1, 0], [1, 1], [0, 1]])]
                ),
                _make_surya_ocr_result(
                    [_make_surya_line("b-line", [[0, 0], [1, 0], [1, 1], [0, 1]])]
                ),
            ]
        )
        client = SuryaOCRClient()
        client._recognition_predictor = mock_predictor
        client._detection_predictor = MagicMock()
        pages = [
            Page(page_no=1, width=10.0, height=20.0, lines=[]),
            Page(page_no=2, width=10.0, height=20.0, lines=[]),
        ]
        result = await client.ocr(
            pages,
            files=[(img_a, "image/png"), (img_b, "image/png")],
            page_metadata=[
                PageMetadata(file_index=0),
                PageMetadata(file_index=1),
            ],
        )
        assert [p.lines[0].text for p in result.result.pages] == ["a-line", "b-line"]
    async def test_lazy_warm_up_on_first_ocr(self, tmp_path: Path) -> None:
        img = tmp_path / "x.png"
        _write_tiny_png(img)
        client = SuryaOCRClient()
        # Use patch.object on the instance's warm_up so we don't need real
        # Surya module loading.
        with patch.object(client, "warm_up", autospec=True) as mocked_warm_up:
            # After warm_up is called, the predictors must be assigned.
            def fake_warm_up(self: SuryaOCRClient) -> None:
                self._recognition_predictor = MagicMock(
                    return_value=[
                        _make_surya_ocr_result(
                            [
                                _make_surya_line(
                                    "hi", [[0, 0], [1, 0], [1, 1], [0, 1]]
                                )
                            ]
                        )
                    ]
                )
                self._detection_predictor = MagicMock()
            mocked_warm_up.side_effect = lambda: fake_warm_up(client)
            pages = [Page(page_no=1, width=10.0, height=10.0, lines=[])]
            await client.ocr(
                pages,
                files=[(img, "image/png")],
                page_metadata=[PageMetadata(file_index=0)],
            )
            mocked_warm_up.assert_called_once()
 class TestSelfcheck:
    async def test_selfcheck_ok_with_mocked_predictors(self) -> None:
        client = SuryaOCRClient()
        client._recognition_predictor = MagicMock(
            return_value=[_make_surya_ocr_result([])]
        )
        client._detection_predictor = MagicMock()
        assert await client.selfcheck() == "ok"
    async def test_selfcheck_fail_when_predictor_raises(self) -> None:
        client = SuryaOCRClient()
        client._recognition_predictor = MagicMock(
            side_effect=RuntimeError("cuda broken")
        )
        client._detection_predictor = MagicMock()
        assert await client.selfcheck() == "fail"
 def _write_tiny_png(path: Path) -> None:
    """Write a 2x2 white PNG so PIL can open it."""
    from PIL import Image
    Image.new("RGB", (2, 2), color="white").save(path, format="PNG")
 class TestGpuAvailableFlag:
    def test_default_is_none(self) -> None:
        client = SuryaOCRClient()
        assert client.gpu_available is None
    def test_warm_up_probes_cuda_true(self) -> None:
        """When torch reports CUDA, warm_up records True on the instance."""
        client = SuryaOCRClient()
        fake_foundation = MagicMock()
        fake_recognition = MagicMock()
        fake_detection = MagicMock()
        fake_torch = SimpleNamespace(
            cuda=SimpleNamespace(is_available=lambda: True)
        )
        module_patches = {
            "surya.detection": SimpleNamespace(
                DetectionPredictor=lambda: fake_detection
            ),
            "surya.foundation": SimpleNamespace(
                FoundationPredictor=lambda: fake_foundation
            ),
            "surya.recognition": SimpleNamespace(
                RecognitionPredictor=lambda _f: fake_recognition
            ),
            "torch": fake_torch,
        }
        with patch.dict("sys.modules", module_patches):
            client.warm_up()
        assert client.gpu_available is True
        assert client._recognition_predictor is fake_recognition
        assert client._detection_predictor is fake_detection
    def test_warm_up_probes_cuda_false(self) -> None:
        """CPU-mode host → warm_up records False."""
        client = SuryaOCRClient()
        fake_torch = SimpleNamespace(
            cuda=SimpleNamespace(is_available=lambda: False)
        )
        module_patches = {
            "surya.detection": SimpleNamespace(
                DetectionPredictor=lambda: MagicMock()
            ),
            "surya.foundation": SimpleNamespace(
                FoundationPredictor=lambda: MagicMock()
            ),
            "surya.recognition": SimpleNamespace(
                RecognitionPredictor=lambda _f: MagicMock()
            ),
            "torch": fake_torch,
        }
        with patch.dict("sys.modules", module_patches):
            client.warm_up()
        assert client.gpu_available is False
    def test_warm_up_is_idempotent_for_probe(self) -> None:
        """Second warm_up short-circuits; probed flag is preserved."""
        client = SuryaOCRClient()
        client._recognition_predictor = MagicMock()
        client._detection_predictor = MagicMock()
        client.gpu_available = True
        # No module patches — warm_up must NOT touch sys.modules or torch.
        client.warm_up()
        assert client.gpu_available is True
@pytest.mark.parametrize("unused", [None])  # keep pytest happy if file ever runs alone
 def test_module_imports(unused: None) -> None:
    assert SuryaOCRClient is not None
--- a/tests/unit/test_use_case_bank_statement_header.py
+++ b/tests/unit/test_use_case_bank_statement_header.py
@ -12,7 +12,7 @@ class TestRequest:
    def test_defaults(self) -> None:
        r = Request()
        assert r.use_case_name == "Bank Statement Header"
-        assert r.default_model == "qwen3:14b"
+        assert r.default_model == "gpt-oss:20b"
        # Stable substring for agent/worker tests that want to confirm the
        # prompt is what they think it is.
        assert "extract header metadata" in r.system_prompt
--- a/tests/unit/test_use_case_inline.py
+++ b/tests/unit/test_use_case_inline.py
@ -1,313 +0,0 @@
 """Tests for :mod:`ix.use_cases.inline` — dynamic Pydantic class builder.
 The builder takes an :class:`InlineUseCase` (carried on :class:`RequestIX` as
 ``use_case_inline``) and produces a fresh ``(RequestClass, ResponseClass)``
 pair that the pipeline can consume in place of a registered use case.
 """
 from __future__ import annotations
 import json
 from datetime import date, datetime
 from decimal import Decimal
 import pytest
 from pydantic import BaseModel, ValidationError
 from ix.contracts.request import InlineUseCase, UseCaseFieldDef
 from ix.errors import IXErrorCode, IXException
 from ix.use_cases.inline import build_use_case_classes
 class TestUseCaseFieldDef:
    def test_minimal(self) -> None:
        fd = UseCaseFieldDef(name="foo", type="str")
        assert fd.name == "foo"
        assert fd.type == "str"
        assert fd.required is False
        assert fd.description is None
        assert fd.choices is None
    def test_extra_forbidden(self) -> None:
        with pytest.raises(ValidationError):
            UseCaseFieldDef.model_validate(
                {"name": "foo", "type": "str", "bogus": 1}
            )
    def test_invalid_type_rejected(self) -> None:
        with pytest.raises(ValidationError):
            UseCaseFieldDef.model_validate({"name": "foo", "type": "list"})
 class TestInlineUseCaseRoundtrip:
    def test_json_roundtrip(self) -> None:
        iuc = InlineUseCase(
            use_case_name="Vendor Total",
            system_prompt="Extract invoice total and vendor.",
            default_model="qwen3:14b",
            fields=[
                UseCaseFieldDef(name="vendor", type="str", required=True),
                UseCaseFieldDef(
                    name="total",
                    type="decimal",
                    required=True,
                    description="total amount due",
                ),
                UseCaseFieldDef(
                    name="currency",
                    type="str",
                    choices=["USD", "EUR", "CHF"],
                ),
            ],
        )
        dumped = iuc.model_dump_json()
        round = InlineUseCase.model_validate_json(dumped)
        assert round == iuc
        # JSON is well-formed
        json.loads(dumped)
    def test_extra_forbidden(self) -> None:
        with pytest.raises(ValidationError):
            InlineUseCase.model_validate(
                {
                    "use_case_name": "X",
                    "system_prompt": "p",
                    "fields": [],
                    "bogus": 1,
                }
            )
 class TestBuildBasicTypes:
    @pytest.mark.parametrize(
        "type_name, sample_value, bad_value",
        [
            ("str", "hello", 123),
            ("int", 42, "nope"),
            ("float", 3.14, "nope"),
            ("bool", True, "nope"),
        ],
    )
    def test_simple_type(
        self, type_name: str, sample_value: object, bad_value: object
    ) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="field", type=type_name, required=True)],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        instance = resp_cls(field=sample_value)
        assert instance.field == sample_value
        with pytest.raises(ValidationError):
            resp_cls(field=bad_value)
    def test_decimal_type(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="amount", type="decimal", required=True)],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        instance = resp_cls(amount="12.34")
        assert isinstance(instance.amount, Decimal)
        assert instance.amount == Decimal("12.34")
    def test_date_type(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="d", type="date", required=True)],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        instance = resp_cls(d="2026-04-18")
        assert instance.d == date(2026, 4, 18)
    def test_datetime_type(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="ts", type="datetime", required=True)],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        instance = resp_cls(ts="2026-04-18T10:00:00")
        assert isinstance(instance.ts, datetime)
 class TestOptionalVsRequired:
    def test_required_field_cannot_be_missing(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="must", type="str", required=True)],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        with pytest.raises(ValidationError):
            resp_cls()
    def test_optional_field_defaults_to_none(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="maybe", type="str", required=False)],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        instance = resp_cls()
        assert instance.maybe is None
    def test_optional_field_schema_allows_null(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="maybe", type="str", required=False)],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        schema = resp_cls.model_json_schema()
        # "maybe" accepts string or null
        prop = schema["properties"]["maybe"]
        # Pydantic may express Optional as anyOf [str, null] or a type list.
        # Either is fine — just assert null is allowed somewhere.
        dumped = json.dumps(prop)
        assert "null" in dumped
 class TestChoices:
    def test_choices_for_str_produces_literal(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[
                UseCaseFieldDef(
                    name="kind",
                    type="str",
                    required=True,
                    choices=["a", "b", "c"],
                )
            ],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        inst = resp_cls(kind="a")
        assert inst.kind == "a"
        with pytest.raises(ValidationError):
            resp_cls(kind="nope")
        schema = resp_cls.model_json_schema()
        # enum or const wind up in a referenced definition; walk the schema
        dumped = json.dumps(schema)
        assert '"a"' in dumped and '"b"' in dumped and '"c"' in dumped
    def test_choices_for_non_str_raises_ix_001_001(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[
                UseCaseFieldDef(
                    name="kind",
                    type="int",
                    required=True,
                    choices=["1", "2"],
                )
            ],
        )
        with pytest.raises(IXException) as exc:
            build_use_case_classes(iuc)
        assert exc.value.code is IXErrorCode.IX_001_001
    def test_empty_choices_list_ignored(self) -> None:
        # An explicitly empty list is as-if choices were unset; builder must
        # not break. If the caller sent choices=[] we treat the field as
        # plain str.
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[
                UseCaseFieldDef(
                    name="kind", type="str", required=True, choices=[]
                )
            ],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        inst = resp_cls(kind="anything")
        assert inst.kind == "anything"
 class TestValidation:
    def test_duplicate_field_names_raise(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[
                UseCaseFieldDef(name="foo", type="str"),
                UseCaseFieldDef(name="foo", type="int"),
            ],
        )
        with pytest.raises(IXException) as exc:
            build_use_case_classes(iuc)
        assert exc.value.code is IXErrorCode.IX_001_001
    def test_invalid_field_name_raises(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="123abc", type="str")],
        )
        with pytest.raises(IXException) as exc:
            build_use_case_classes(iuc)
        assert exc.value.code is IXErrorCode.IX_001_001
    def test_empty_fields_list_raises(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X", system_prompt="p", fields=[]
        )
        with pytest.raises(IXException) as exc:
            build_use_case_classes(iuc)
        assert exc.value.code is IXErrorCode.IX_001_001
 class TestResponseClassNaming:
    def test_class_name_sanitised(self) -> None:
        iuc = InlineUseCase(
            use_case_name="Bank / Statement — header!",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="x", type="str")],
        )
        _req_cls, resp_cls = build_use_case_classes(iuc)
        assert resp_cls.__name__.startswith("Inline_")
        # Only alphanumerics and underscores remain.
        assert all(c.isalnum() or c == "_" for c in resp_cls.__name__)
    def test_fresh_instances_per_call(self) -> None:
        iuc = InlineUseCase(
            use_case_name="X",
            system_prompt="p",
            fields=[UseCaseFieldDef(name="x", type="str")],
        )
        req1, resp1 = build_use_case_classes(iuc)
        req2, resp2 = build_use_case_classes(iuc)
        assert resp1 is not resp2
        assert req1 is not req2
 class TestRequestClassShape:
    def test_request_class_exposes_prompt_and_default(self) -> None:
        iuc = InlineUseCase(
            use_case_name="My Case",
            system_prompt="Follow directions.",
            default_model="qwen3:14b",
            fields=[UseCaseFieldDef(name="x", type="str")],
        )
        req_cls, _resp_cls = build_use_case_classes(iuc)
        inst = req_cls()
        assert inst.use_case_name == "My Case"
        assert inst.system_prompt == "Follow directions."
        assert inst.default_model == "qwen3:14b"
        assert issubclass(req_cls, BaseModel)
    def test_default_model_none_when_unset(self) -> None:
        iuc = InlineUseCase(
            use_case_name="My Case",
            system_prompt="Follow directions.",
            fields=[UseCaseFieldDef(name="x", type="str")],
        )
        req_cls, _resp_cls = build_use_case_classes(iuc)
        inst = req_cls()
        assert inst.default_model is None
--- a/uv.lock
+++ b/uv.lock
@ -7,15 +7,6 @@ resolution-markers = [
    "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 [[package]]
 name = "aiofiles"
 version = "25.1.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" },
 ]
 [[package]]
 name = "alembic"
 version = "1.18.4"
@ -119,79 +110,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
 ]
 [[package]]
 name = "charset-normalizer"
 version = "3.4.7"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271, upload-time = "2026-04-02T09:28:39.342Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/0c/eb/4fc8d0a7110eb5fc9cc161723a34a8a6c200ce3b4fbf681bc86feee22308/charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", size = 311328, upload-time = "2026-04-02T09:26:24.331Z" },
    { url = "https://files.pythonhosted.org/packages/f8/e3/0fadc706008ac9d7b9b5be6dc767c05f9d3e5df51744ce4cc9605de7b9f4/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", size = 208061, upload-time = "2026-04-02T09:26:25.568Z" },
    { url = "https://files.pythonhosted.org/packages/42/f0/3dd1045c47f4a4604df85ec18ad093912ae1344ac706993aff91d38773a2/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", size = 229031, upload-time = "2026-04-02T09:26:26.865Z" },
    { url = "https://files.pythonhosted.org/packages/dc/67/675a46eb016118a2fbde5a277a5d15f4f69d5f3f5f338e5ee2f8948fcf43/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", size = 225239, upload-time = "2026-04-02T09:26:28.044Z" },
    { url = "https://files.pythonhosted.org/packages/4b/f8/d0118a2f5f23b02cd166fa385c60f9b0d4f9194f574e2b31cef350ad7223/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", size = 216589, upload-time = "2026-04-02T09:26:29.239Z" },
    { url = "https://files.pythonhosted.org/packages/b1/f1/6d2b0b261b6c4ceef0fcb0d17a01cc5bc53586c2d4796fa04b5c540bc13d/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", size = 202733, upload-time = "2026-04-02T09:26:30.5Z" },
    { url = "https://files.pythonhosted.org/packages/6f/c0/7b1f943f7e87cc3db9626ba17807d042c38645f0a1d4415c7a14afb5591f/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", size = 212652, upload-time = "2026-04-02T09:26:31.709Z" },
    { url = "https://files.pythonhosted.org/packages/38/dd/5a9ab159fe45c6e72079398f277b7d2b523e7f716acc489726115a910097/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", size = 211229, upload-time = "2026-04-02T09:26:33.282Z" },
    { url = "https://files.pythonhosted.org/packages/d5/ff/531a1cad5ca855d1c1a8b69cb71abfd6d85c0291580146fda7c82857caa1/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", size = 203552, upload-time = "2026-04-02T09:26:34.845Z" },
    { url = "https://files.pythonhosted.org/packages/c1/4c/a5fb52d528a8ca41f7598cb619409ece30a169fbdf9cdce592e53b46c3a6/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", size = 230806, upload-time = "2026-04-02T09:26:36.152Z" },
    { url = "https://files.pythonhosted.org/packages/59/7a/071feed8124111a32b316b33ae4de83d36923039ef8cf48120266844285b/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", size = 212316, upload-time = "2026-04-02T09:26:37.672Z" },
    { url = "https://files.pythonhosted.org/packages/fd/35/f7dba3994312d7ba508e041eaac39a36b120f32d4c8662b8814dab876431/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464", size = 227274, upload-time = "2026-04-02T09:26:38.93Z" },
    { url = "https://files.pythonhosted.org/packages/8a/2d/a572df5c9204ab7688ec1edc895a73ebded3b023bb07364710b05dd1c9be/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", size = 218468, upload-time = "2026-04-02T09:26:40.17Z" },
    { url = "https://files.pythonhosted.org/packages/86/eb/890922a8b03a568ca2f336c36585a4713c55d4d67bf0f0c78924be6315ca/charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", size = 148460, upload-time = "2026-04-02T09:26:41.416Z" },
    { url = "https://files.pythonhosted.org/packages/35/d9/0e7dffa06c5ab081f75b1b786f0aefc88365825dfcd0ac544bdb7b2b6853/charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", size = 159330, upload-time = "2026-04-02T09:26:42.554Z" },
    { url = "https://files.pythonhosted.org/packages/9e/5d/481bcc2a7c88ea6b0878c299547843b2521ccbc40980cb406267088bc701/charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", size = 147828, upload-time = "2026-04-02T09:26:44.075Z" },
    { url = "https://files.pythonhosted.org/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627, upload-time = "2026-04-02T09:26:45.198Z" },
    { url = "https://files.pythonhosted.org/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008, upload-time = "2026-04-02T09:26:46.824Z" },
    { url = "https://files.pythonhosted.org/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303, upload-time = "2026-04-02T09:26:48.397Z" },
    { url = "https://files.pythonhosted.org/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282, upload-time = "2026-04-02T09:26:49.684Z" },
    { url = "https://files.pythonhosted.org/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595, upload-time = "2026-04-02T09:26:50.915Z" },
    { url = "https://files.pythonhosted.org/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986, upload-time = "2026-04-02T09:26:52.197Z" },
    { url = "https://files.pythonhosted.org/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711, upload-time = "2026-04-02T09:26:53.49Z" },
    { url = "https://files.pythonhosted.org/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036, upload-time = "2026-04-02T09:26:54.975Z" },
    { url = "https://files.pythonhosted.org/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998, upload-time = "2026-04-02T09:26:56.303Z" },
    { url = "https://files.pythonhosted.org/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056, upload-time = "2026-04-02T09:26:57.554Z" },
    { url = "https://files.pythonhosted.org/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537, upload-time = "2026-04-02T09:26:58.843Z" },
    { url = "https://files.pythonhosted.org/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176, upload-time = "2026-04-02T09:27:00.437Z" },
    { url = "https://files.pythonhosted.org/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723, upload-time = "2026-04-02T09:27:02.021Z" },
    { url = "https://files.pythonhosted.org/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085, upload-time = "2026-04-02T09:27:03.192Z" },
    { url = "https://files.pythonhosted.org/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819, upload-time = "2026-04-02T09:27:04.454Z" },
    { url = "https://files.pythonhosted.org/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915, upload-time = "2026-04-02T09:27:05.971Z" },
    { url = "https://files.pythonhosted.org/packages/97/c8/c67cb8c70e19ef1960b97b22ed2a1567711de46c4ddf19799923adc836c2/charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", size = 309234, upload-time = "2026-04-02T09:27:07.194Z" },
    { url = "https://files.pythonhosted.org/packages/99/85/c091fdee33f20de70d6c8b522743b6f831a2f1cd3ff86de4c6a827c48a76/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", size = 208042, upload-time = "2026-04-02T09:27:08.749Z" },
    { url = "https://files.pythonhosted.org/packages/87/1c/ab2ce611b984d2fd5d86a5a8a19c1ae26acac6bad967da4967562c75114d/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", size = 228706, upload-time = "2026-04-02T09:27:09.951Z" },
    { url = "https://files.pythonhosted.org/packages/a8/29/2b1d2cb00bf085f59d29eb773ce58ec2d325430f8c216804a0a5cd83cbca/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", size = 224727, upload-time = "2026-04-02T09:27:11.175Z" },
    { url = "https://files.pythonhosted.org/packages/47/5c/032c2d5a07fe4d4855fea851209cca2b6f03ebeb6d4e3afdb3358386a684/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", size = 215882, upload-time = "2026-04-02T09:27:12.446Z" },
    { url = "https://files.pythonhosted.org/packages/2c/c2/356065d5a8b78ed04499cae5f339f091946a6a74f91e03476c33f0ab7100/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", size = 200860, upload-time = "2026-04-02T09:27:13.721Z" },
    { url = "https://files.pythonhosted.org/packages/0c/cd/a32a84217ced5039f53b29f460962abb2d4420def55afabe45b1c3c7483d/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", size = 211564, upload-time = "2026-04-02T09:27:15.272Z" },
    { url = "https://files.pythonhosted.org/packages/44/86/58e6f13ce26cc3b8f4a36b94a0f22ae2f00a72534520f4ae6857c4b81f89/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", size = 211276, upload-time = "2026-04-02T09:27:16.834Z" },
    { url = "https://files.pythonhosted.org/packages/8f/fe/d17c32dc72e17e155e06883efa84514ca375f8a528ba2546bee73fc4df81/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", size = 201238, upload-time = "2026-04-02T09:27:18.229Z" },
    { url = "https://files.pythonhosted.org/packages/6a/29/f33daa50b06525a237451cdb6c69da366c381a3dadcd833fa5676bc468b3/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", size = 230189, upload-time = "2026-04-02T09:27:19.445Z" },
    { url = "https://files.pythonhosted.org/packages/b6/6e/52c84015394a6a0bdcd435210a7e944c5f94ea1055f5cc5d56c5fe368e7b/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", size = 211352, upload-time = "2026-04-02T09:27:20.79Z" },
    { url = "https://files.pythonhosted.org/packages/8c/d7/4353be581b373033fb9198bf1da3cf8f09c1082561e8e922aa7b39bf9fe8/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", size = 227024, upload-time = "2026-04-02T09:27:22.063Z" },
    { url = "https://files.pythonhosted.org/packages/30/45/99d18aa925bd1740098ccd3060e238e21115fffbfdcb8f3ece837d0ace6c/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", size = 217869, upload-time = "2026-04-02T09:27:23.486Z" },
    { url = "https://files.pythonhosted.org/packages/5c/05/5ee478aa53f4bb7996482153d4bfe1b89e0f087f0ab6b294fcf92d595873/charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", size = 148541, upload-time = "2026-04-02T09:27:25.146Z" },
    { url = "https://files.pythonhosted.org/packages/48/77/72dcb0921b2ce86420b2d79d454c7022bf5be40202a2a07906b9f2a35c97/charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", size = 159634, upload-time = "2026-04-02T09:27:26.642Z" },
    { url = "https://files.pythonhosted.org/packages/c6/a3/c2369911cd72f02386e4e340770f6e158c7980267da16af8f668217abaa0/charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", size = 148384, upload-time = "2026-04-02T09:27:28.271Z" },
    { url = "https://files.pythonhosted.org/packages/94/09/7e8a7f73d24dba1f0035fbbf014d2c36828fc1bf9c88f84093e57d315935/charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", size = 330133, upload-time = "2026-04-02T09:27:29.474Z" },
    { url = "https://files.pythonhosted.org/packages/8d/da/96975ddb11f8e977f706f45cddd8540fd8242f71ecdb5d18a80723dcf62c/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", size = 216257, upload-time = "2026-04-02T09:27:30.793Z" },
    { url = "https://files.pythonhosted.org/packages/e5/e8/1d63bf8ef2d388e95c64b2098f45f84758f6d102a087552da1485912637b/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", size = 234851, upload-time = "2026-04-02T09:27:32.44Z" },
    { url = "https://files.pythonhosted.org/packages/9b/40/e5ff04233e70da2681fa43969ad6f66ca5611d7e669be0246c4c7aaf6dc8/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", size = 233393, upload-time = "2026-04-02T09:27:34.03Z" },
    { url = "https://files.pythonhosted.org/packages/be/c1/06c6c49d5a5450f76899992f1ee40b41d076aee9279b49cf9974d2f313d5/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", size = 223251, upload-time = "2026-04-02T09:27:35.369Z" },
    { url = "https://files.pythonhosted.org/packages/2b/9f/f2ff16fb050946169e3e1f82134d107e5d4ae72647ec8a1b1446c148480f/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", size = 206609, upload-time = "2026-04-02T09:27:36.661Z" },
    { url = "https://files.pythonhosted.org/packages/69/d5/a527c0cd8d64d2eab7459784fb4169a0ac76e5a6fc5237337982fd61347e/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", size = 220014, upload-time = "2026-04-02T09:27:38.019Z" },
    { url = "https://files.pythonhosted.org/packages/7e/80/8a7b8104a3e203074dc9aa2c613d4b726c0e136bad1cc734594b02867972/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", size = 218979, upload-time = "2026-04-02T09:27:39.37Z" },
    { url = "https://files.pythonhosted.org/packages/02/9a/b759b503d507f375b2b5c153e4d2ee0a75aa215b7f2489cf314f4541f2c0/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", size = 209238, upload-time = "2026-04-02T09:27:40.722Z" },
    { url = "https://files.pythonhosted.org/packages/c2/4e/0f3f5d47b86bdb79256e7290b26ac847a2832d9a4033f7eb2cd4bcf4bb5b/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", size = 236110, upload-time = "2026-04-02T09:27:42.33Z" },
    { url = "https://files.pythonhosted.org/packages/96/23/bce28734eb3ed2c91dcf93abeb8a5cf393a7b2749725030bb630e554fdd8/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", size = 219824, upload-time = "2026-04-02T09:27:43.924Z" },
    { url = "https://files.pythonhosted.org/packages/2c/6f/6e897c6984cc4d41af319b077f2f600fc8214eb2fe2d6bcb79141b882400/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", size = 233103, upload-time = "2026-04-02T09:27:45.348Z" },
    { url = "https://files.pythonhosted.org/packages/76/22/ef7bd0fe480a0ae9b656189ec00744b60933f68b4f42a7bb06589f6f576a/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", size = 225194, upload-time = "2026-04-02T09:27:46.706Z" },
    { url = "https://files.pythonhosted.org/packages/c5/a7/0e0ab3e0b5bc1219bd80a6a0d4d72ca74d9250cb2382b7c699c147e06017/charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", size = 159827, upload-time = "2026-04-02T09:27:48.053Z" },
    { url = "https://files.pythonhosted.org/packages/7a/1d/29d32e0fb40864b1f878c7f5a0b343ae676c6e2b271a2d55cc3a152391da/charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", size = 174168, upload-time = "2026-04-02T09:27:49.795Z" },
    { url = "https://files.pythonhosted.org/packages/de/32/d92444ad05c7a6e41fb2036749777c163baf7a0301a040cb672d6b2b1ae9/charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", size = 153018, upload-time = "2026-04-02T09:27:51.116Z" },
    { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" },
 ]
 [[package]]
 name = "click"
 version = "8.3.2"
@ -490,21 +408,22 @@ wheels = [
 [[package]]
 name = "huggingface-hub"
-version = "0.36.2"
+version = "1.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "filelock" },
    { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
    { name = "httpx" },
    { name = "packaging" },
    { name = "pyyaml" },
    { name = "requests" },
    { name = "tqdm" },
    { name = "typer" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/89/e7aa12d8a6b9259bed10671abb25ae6fa437c0f88a86ecbf59617bae7759/huggingface_hub-1.11.0.tar.gz", hash = "sha256:15fb3713c7f9cdff7b808a94fd91664f661ab142796bb48c9cd9493e8d166278", size = 761749, upload-time = "2026-04-16T13:07:39.73Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" },
+    { url = "https://files.pythonhosted.org/packages/37/02/4f3f8997d1ea7fe0146b343e5e14bd065fa87af790d07e5576d31b31cc18/huggingface_hub-1.11.0-py3-none-any.whl", hash = "sha256:42a6de0afbfeb5e022222d36398f029679db4eb4778801aafda32257ae9131ab", size = 645499, upload-time = "2026-04-16T13:07:37.716Z" },
 ]
 [[package]]
@ -530,19 +449,16 @@ name = "infoxtractor"
 version = "0.1.0"
 source = { editable = "." }
 dependencies = [
    { name = "aiofiles" },
    { name = "alembic" },
    { name = "asyncpg" },
    { name = "fastapi" },
    { name = "httpx" },
    { name = "jinja2" },
    { name = "pillow" },
    { name = "pydantic" },
    { name = "pydantic-settings" },
    { name = "pymupdf" },
    { name = "python-dateutil" },
    { name = "python-magic" },
    { name = "python-multipart" },
    { name = "sqlalchemy", extra = ["asyncio"] },
    { name = "uvicorn", extra = ["standard"] },
 ]
@ -562,12 +478,10 @@ ocr = [
 [package.metadata]
 requires-dist = [
    { name = "aiofiles", specifier = ">=24.1" },
    { name = "alembic", specifier = ">=1.14" },
    { name = "asyncpg", specifier = ">=0.30" },
    { name = "fastapi", specifier = ">=0.115" },
    { name = "httpx", specifier = ">=0.27" },
    { name = "jinja2", specifier = ">=3.1" },
    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.13" },
    { name = "pillow", specifier = ">=10.2,<11.0" },
    { name = "pydantic", specifier = ">=2.9" },
@ -578,11 +492,10 @@ requires-dist = [
    { name = "pytest-httpx", marker = "extra == 'dev'", specifier = ">=0.32" },
    { name = "python-dateutil", specifier = ">=2.9" },
    { name = "python-magic", specifier = ">=0.4.27" },
    { name = "python-multipart", specifier = ">=0.0.12" },
    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8" },
    { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.36" },
-    { name = "surya-ocr", marker = "extra == 'ocr'", specifier = ">=0.17,<0.18" },
+    { name = "surya-ocr", marker = "extra == 'ocr'", specifier = ">=0.9" },
-    { name = "torch", marker = "extra == 'ocr'", specifier = ">=2.7" },
+    { name = "torch", marker = "extra == 'ocr'", specifier = ">=2.4" },
    { name = "uvicorn", extras = ["standard"], specifier = ">=0.32" },
 ]
 provides-extras = ["ocr", "dev"]
@ -680,6 +593,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/68/a5/19d7aaa7e433713ffe881df33705925a196afb9532efc8475d26593921a6/mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77", size = 78503, upload-time = "2026-04-14T20:19:53.233Z" },
 ]
 [[package]]
 name = "markdown-it-py"
 version = "4.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "mdurl" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
 ]
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
@ -743,6 +668,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
 ]
 [[package]]
 name = "mdurl"
 version = "0.1.2"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
 ]
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@ -1365,15 +1299,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" },
 ]
 [[package]]
 name = "python-multipart"
 version = "0.0.26"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/88/71/b145a380824a960ebd60e1014256dbb7d2253f2316ff2d73dfd8928ec2c3/python_multipart-0.0.26.tar.gz", hash = "sha256:08fadc45918cd615e26846437f50c5d6d23304da32c341f289a617127b081f17", size = 43501, upload-time = "2026-04-10T14:09:59.473Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" },
 ]
 [[package]]
 name = "pyyaml"
 version = "6.0.3"
@ -1509,18 +1434,16 @@ wheels = [
 ]
 [[package]]
-name = "requests"
+name = "rich"
-version = "2.33.1"
+version = "15.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "certifi" },
+    { name = "markdown-it-py" },
-    { name = "charset-normalizer" },
+    { name = "pygments" },
    { name = "idna" },
    { name = "urllib3" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5f/a4/98b9c7c6428a668bf7e42ebb7c79d576a1c3c1e3ae2d47e674b468388871/requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517", size = 134120, upload-time = "2026-03-30T16:09:15.531Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/8e/7540e8a2036f79a125c1d2ebadf69ed7901608859186c856fa0388ef4197/requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a", size = 64947, upload-time = "2026-03-30T16:09:13.83Z" },
+    { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" },
 ]
 [[package]]
@ -1579,6 +1502,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" },
 ]
 [[package]]
 name = "shellingham"
 version = "1.5.4"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
 ]
 [[package]]
 name = "six"
 version = "1.17.0"
@ -1771,23 +1703,22 @@ wheels = [
 [[package]]
 name = "transformers"
-version = "4.57.6"
+version = "5.5.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "filelock" },
    { name = "huggingface-hub" },
    { name = "numpy" },
    { name = "packaging" },
    { name = "pyyaml" },
    { name = "regex" },
    { name = "requests" },
    { name = "safetensors" },
    { name = "tokenizers" },
    { name = "tqdm" },
    { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" },
+    { url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" },
 ]
 [[package]]
@ -1807,6 +1738,21 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" },
 ]
 [[package]]
 name = "typer"
 version = "0.24.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "annotated-doc" },
    { name = "click" },
    { name = "rich" },
    { name = "shellingham" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
 ]
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"
@ -1828,15 +1774,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
 ]
 [[package]]
 name = "urllib3"
 version = "2.6.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
 ]
 [[package]]
 name = "uvicorn"
 version = "0.44.0"