32 changed files with 129 additions and 2374 deletions
--- a/.env.example
+++ b/.env.example
@ -4,11 +4,11 @@
 # the Postgres password.

 # --- Job store -----------------------------------------------------------
-IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@127.0.0.1:5431/infoxtractor
+IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.internal:5431/infoxtractor

 # --- LLM backend ---------------------------------------------------------
-IX_OLLAMA_URL=http://127.0.0.1:11434
-IX_DEFAULT_MODEL=qwen3:14b
+IX_OLLAMA_URL=http://host.docker.internal:11434
+IX_DEFAULT_MODEL=gpt-oss:20b

 # --- OCR -----------------------------------------------------------------
 IX_OCR_ENGINE=surya
--- a/.gitignore
+++ b/.gitignore
@ -15,7 +15,6 @@ dist/
 build/
 *.log
 /tmp/
-.claude/
 # uv
 # uv.lock is committed intentionally for reproducible builds.

--- a/AGENTS.md
+++ b/AGENTS.md
@ -4,7 +4,7 @@ Async, on-prem, LLM-powered structured information extraction microservice. Give

 Designed to be used by other on-prem services (e.g. mammon) as a reliable fallback / second opinion for format-specific deterministic parsers.

-Status: MVP deployed (2026-04-18) at `http://192.168.68.42:8994` — LAN only. Full reference spec at `docs/spec-core-pipeline.md`; MVP spec at `docs/superpowers/specs/2026-04-18-ix-mvp-design.md`; deploy runbook at `docs/deployment.md`.
+Status: design phase. Full reference spec at `docs/spec-core-pipeline.md`. MVP spec will live at `docs/superpowers/specs/`.

 ## Guiding Principles

@ -25,7 +25,7 @@ Status: MVP deployed (2026-04-18) at `http://192.168.68.42:8994` — LAN only. F
 - **Language**: Python 3.12, asyncio
 - **Web/REST**: FastAPI + uvicorn
 - **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML)
- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `qwen3:14b`, configurable per use case
+- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `gpt-oss:20b`, configurable per use case
 - **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database
 - **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps)

--- a/69
+++ b/69
@ -1,69 +0,0 @@
-# InfoXtractor container image.
-#
-# Base image ships CUDA 12.4 runtime libraries so the Surya OCR client can
-# use the RTX 3090 on the deploy host. Ubuntu 22.04 is the LTS used across
-# the home-server stack (immich-ml, monitoring) so GPU drivers line up.
-FROM nvidia/cuda:12.4.0-runtime-ubuntu22.04
-
-ENV DEBIAN_FRONTEND=noninteractive \
-    PYTHONUNBUFFERED=1 \
-    PYTHONDONTWRITEBYTECODE=1 \
-    PIP_NO_CACHE_DIR=1 \
-    PIP_DISABLE_PIP_VERSION_CHECK=1
-
-# --- System deps --------------------------------------------------------
-#   - python3.12 via deadsnakes PPA (pinned; Ubuntu 22.04 ships 3.10 only)
-#   - libmagic1  : python-magic backend for MIME sniffing
-#   - libgl1     : libGL.so needed by Pillow/OpenCV wheels used by Surya
-#   - libglib2.0 : shared by Pillow/PyMuPDF headless rendering
-#   - curl       : post-receive hook's /healthz probe & general ops
-#   - ca-certs   : httpx TLS verification
-RUN apt-get update \
- && apt-get install -y --no-install-recommends \
-        software-properties-common \
-        ca-certificates \
-        curl \
-        gnupg \
- && add-apt-repository -y ppa:deadsnakes/ppa \
- && apt-get update \
- && apt-get install -y --no-install-recommends \
-        python3.12 \
-        python3.12-venv \
-        python3.12-dev \
-        libmagic1 \
-        libgl1 \
-        libglib2.0-0 \
- && ln -sf /usr/bin/python3.12 /usr/local/bin/python \
- && ln -sf /usr/bin/python3.12 /usr/local/bin/python3 \
- && apt-get clean \
- && rm -rf /var/lib/apt/lists/*
-
-# --- uv (dependency resolver used by the project) -----------------------
-# Install via the standalone installer; avoids needing a working system pip
-# (python3.12 on Ubuntu 22.04 has no `distutils`, which breaks Ubuntu pip).
-RUN curl -LsSf https://astral.sh/uv/install.sh | sh \
- && ln -sf /root/.local/bin/uv /usr/local/bin/uv
-
-WORKDIR /app
-
-# Copy dependency manifests + README early so the heavy `uv sync` layer
-# caches whenever only application code changes. README.md is required
-# because pyproject.toml names it as the package's readme — hatchling
-# validates it exists when resolving the editable install.
-COPY pyproject.toml uv.lock .python-version README.md ./
-
-# Prod + OCR extras, no dev tooling. --frozen means "must match uv.lock";
-# CI catches drift before it reaches the image.
-RUN uv sync --frozen --no-dev --extra ocr
-
-# --- Application code ---------------------------------------------------
-COPY src src
-COPY alembic alembic
-COPY alembic.ini ./
-
-EXPOSE 8994
-
-# Migrations are idempotent (alembic upgrade head is a no-op on a current
-# DB) so running them on every start keeps the image + DB aligned without
-# an extra orchestration step.
-CMD ["sh", "-c", "uv run alembic upgrade head && uv run uvicorn ix.app:create_app --factory --host 0.0.0.0 --port 8994"]
--- a/README.md
+++ b/README.md
@ -4,12 +4,10 @@ Async, on-prem, LLM-powered structured information extraction microservice.

 Given a document (PDF, image, text) and a named *use case*, ix returns a structured JSON result whose shape matches the use-case schema — together with per-field provenance (OCR segment IDs, bounding boxes, cross-OCR agreement flags) that let the caller decide how much to trust each extracted value.

-**Status:** MVP deployed. Live on the home LAN at `http://192.168.68.42:8994`.
+**Status:** design phase. Implementation about to start.

 - Full reference spec: [`docs/spec-core-pipeline.md`](docs/spec-core-pipeline.md) (aspirational; MVP is a strict subset)
 - **MVP design:** [`docs/superpowers/specs/2026-04-18-ix-mvp-design.md`](docs/superpowers/specs/2026-04-18-ix-mvp-design.md)
- **Implementation plan:** [`docs/superpowers/plans/2026-04-18-ix-mvp-implementation.md`](docs/superpowers/plans/2026-04-18-ix-mvp-implementation.md)
- **Deployment runbook:** [`docs/deployment.md`](docs/deployment.md)
 - Agent / development notes: [`AGENTS.md`](AGENTS.md)

 ## Principles
@ -17,44 +15,3 @@ Given a document (PDF, image, text) and a named *use case*, ix returns a structu
 - **On-prem always.** LLM = Ollama, OCR = local engines (Surya first). No OpenAI / Anthropic / Azure / AWS / cloud.
 - **Grounded extraction, not DB truth.** ix returns best-effort fields + provenance; the caller decides what to trust.
 - **Transport-agnostic pipeline core.** REST + Postgres-queue adapters in parallel on one job store.
-
-## Submitting a job
-
-```bash
-curl -X POST http://192.168.68.42:8994/jobs \
-  -H "Content-Type: application/json" \
-  -d '{
-    "use_case": "bank_statement_header",
-    "ix_client_id": "mammon",
-    "request_id": "some-correlation-id",
-    "context": {
-      "files": [{
-        "url": "http://paperless.local/api/documents/42/download/",
-        "headers": {"Authorization": "Token …"}
-      }],
-      "texts": ["<Paperless Tesseract OCR content>"]
-    }
-  }'
-# → {"job_id":"…","ix_id":"…","status":"pending"}
-```
-
-Poll `GET /jobs/{job_id}` until `status` is `done` or `error`. Optionally pass `callback_url` to receive a webhook on completion (one-shot, no retry; polling stays authoritative).
-
-Full REST surface + provenance response shape documented in the MVP design spec.
-
-## Running locally
-
-```bash
-uv sync --extra dev
-uv run pytest tests/unit -v                    # hermetic unit + integration suite
-IX_TEST_OLLAMA=1 uv run pytest tests/live -v    # needs LAN access to Ollama + GPU
-```
-
-## Deploying
-
-```bash
-git push server main      # rebuilds Docker image, restarts container, /healthz deploy gate
-python scripts/e2e_smoke.py   # E2E acceptance against the live service
-```
-
-See [`docs/deployment.md`](docs/deployment.md) for full runbook + rollback.
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -1,42 +0,0 @@
-# InfoXtractor Docker Compose stack.
-#
-# Single service. Uses host networking so the container can reach:
-#   - Ollama at 127.0.0.1:11434
-#   - postgis at 127.0.0.1:5431 (bound to loopback only; security hardening)
-# Both services are LAN-hardened on the host and never exposed publicly,
-# so host-network access stays on-prem. This matches the `goldstein`
-# container pattern on the same server.
-#
-# The GPU reservation block matches immich-ml / the shape Docker Compose
-# expects for GPU allocation on this host.
-
-name: infoxtractor
-
-services:
-  infoxtractor:
-    build: .
-    container_name: infoxtractor
-    network_mode: host
-    restart: always
-    env_file: .env
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: 1
-              capabilities: [gpu]
-    volumes:
-      # Persist Surya (datalab) + HuggingFace model caches so rebuilds don't
-      # re-download ~1.5 GB of weights every time.
-      - ix_surya_cache:/root/.cache/datalab
-      - ix_hf_cache:/root/.cache/huggingface
-    labels:
-      infrastructure.web_url: "http://192.168.68.42:8994"
-      backup.enable: "true"
-      backup.type: "postgres"
-      backup.name: "infoxtractor"
-
-volumes:
-  ix_surya_cache:
-  ix_hf_cache:
--- a/docs/deployment.md
+++ b/docs/deployment.md
@ -1,153 +0,0 @@
-# Deployment
-
-On-prem deploy to `192.168.68.42`. Push-to-deploy via a bare git repo + `post-receive` hook that rebuilds the Docker Compose stack. Pattern mirrors mammon and unified_messaging.
-
-## Topology
-
-```
-Mac (dev)
-  │  git push server main
-  ▼
-192.168.68.42:/home/server/Public/infoxtractor/repos.git   (bare)
-  │  post-receive → GIT_WORK_TREE=/…/app git checkout -f main
-  │                 docker compose up -d --build
-  │                 curl /healthz (60 s gate)
-  ▼
-Docker container `infoxtractor` (port 8994)
-  ├─ 127.0.0.1:11434  →  Ollama (qwen3:14b; host-network mode)
-  └─ 127.0.0.1:5431   →  postgis (database `infoxtractor`; host-network mode)
-```
-
-## One-time server setup
-
-Run **once** from the Mac. Idempotent.
-
-```bash
-export IX_POSTGRES_PASSWORD=<generate-a-strong-one>
-./scripts/setup_server.sh
-```
-
-The script:
-1. Creates `/home/server/Public/infoxtractor/repos.git` (bare) + `/home/server/Public/infoxtractor/app/` (worktree).
-2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template).
-3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container.
-4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in.
-5. Verifies `qwen3:14b` is pulled in Ollama.
-6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing.
-
-After the script finishes, add the deploy remote to the local repo:
-
-```bash
-git remote add server ssh://server@192.168.68.42/home/server/Public/infoxtractor/repos.git
-```
-
-## Normal deploy workflow
-
-```bash
-# after merging a feat branch into main
-git push server main
-
-# tail the server's deploy log
-ssh server@192.168.68.42 "tail -f /tmp/infoxtractor-deploy.log"
-
-# healthz gate (the post-receive hook also waits up to 60 s for this)
-curl http://192.168.68.42:8994/healthz
-
-# end-to-end smoke — this IS the real acceptance test
-python scripts/e2e_smoke.py
-```
-
-If the post-receive hook exits non-zero (healthz never reaches 200), the deploy is considered failed. The previous container keeps running (the hook swaps via `docker compose up -d --build`, which first builds the new image and only swaps if the build succeeds; if the new container fails `/healthz`, it's still up but broken). Investigate with `docker compose logs --tail 200` in `${APP_DIR}` and either fix forward or revert (see below).
-
-## Rollback
-
-Never force-push `main`. Rollbacks happen as **forward commits** via `git revert`:
-
-```bash
-git revert HEAD     # creates a revert commit for the last change
-git push forgejo main
-git push server main
-```
-
-## First deploy
-
- **Date:** 2026-04-18
- **Commit:** `fix/ollama-extract-json` (#36, the last of several Docker/ops follow-ups after PR #27 shipped the initial Dockerfile)
- **`/healthz`:** all three probes (`postgres`, `ollama`, `ocr`) green. First-pass took ~7 min for the fresh container because Surya's recognition (1.34 GB) + detection (73 MB) models download from HuggingFace on first run; subsequent rebuilds reuse the named volumes declared in `docker-compose.yml` and come up in <30 s.
- **E2E extraction:** `bank_statement_header` against `tests/fixtures/synthetic_giro.pdf` with Paperless-style texts:
-  - Pipeline completes in **35 s**.
-  - Extracted: `bank_name=DKB`, `account_iban=DE89370400440532013000`, `currency=EUR`, `opening_balance=1234.56`, `closing_balance=1450.22`, `statement_date=2026-03-31`, `statement_period_end=2026-03-31`, `statement_period_start=2026-03-01`, `account_type=null`.
-  - Provenance: 8 / 9 leaf fields have sources; 7 / 8 `provenance_verified` and `text_agreement` are True. `statement_period_start` shows up in the OCR but normalisation fails (dateutil picks a different interpretation of the cited day); to be chased in a follow-up.
-
-### Docker-ops follow-ups that landed during the first deploy
-
-All small, each merged as its own PR. In commit order after the scaffold (#27):
-
- **#31** `fix(docker): uv via standalone installer` — Python 3.12 on Ubuntu 22.04 drops `distutils`; Ubuntu's pip needed it. Switched to the `uv` standalone installer, which has no pip dependency.
- **#32** `fix(docker): include README.md in the uv sync COPY` — `hatchling` validates the readme file exists when resolving the editable project install.
- **#33** `fix(compose): drop runtime: nvidia` — the deploy host's Docker daemon doesn't register a named `nvidia` runtime; `deploy.resources.devices` is sufficient and matches immich-ml.
- **#34** `fix(deploy): network_mode: host` — `postgis` is bound to `127.0.0.1` on the host (security hardening T12). `host.docker.internal` points at the bridge gateway, not loopback, so the container couldn't reach postgis. Goldstein uses the same pattern.
- **#35** `fix(deps): pin surya-ocr ^0.17` — earlier cu124 torch pin had forced surya to 0.14.1, which breaks our `surya.foundation` import and needs a transformers version that lacks `QuantizedCacheConfig`.
- **#36** `fix(genai): drop Ollama format flag; extract trailing JSON` — Ollama 0.11.8 segfaults on Pydantic JSON Schemas (`$ref`, `anyOf`, `pattern`), and `format="json"` terminates reasoning models (qwen3) at `{}` because their `<think>…</think>` chain-of-thought isn't valid JSON. Omit the flag, inject the schema into the system prompt, extract the outermost `{…}` balanced block from the response.
- **volumes** — named `ix_surya_cache` + `ix_hf_cache` mount `/root/.cache/datalab` + `/root/.cache/huggingface` so rebuilds don't re-download ~1.5 GB of model weights.
-
-Production notes:
-
- `IX_DEFAULT_MODEL=qwen3:14b` (already pulled on the host). Spec listed `gpt-oss:20b` as a concrete example; swapped to keep the deploy on-prem without an extra `ollama pull`.
- Torch 2.11 default cu13 wheels fall back to CPU against the host's CUDA 12.4 driver — Surya runs on CPU. Expected inference times: seconds per page. Upgrading the NVIDIA driver (or pinning a cu12-compatible torch wheel newer than 2.7) will unlock GPU with no code changes.
-
-## E2E smoke test (`scripts/e2e_smoke.py`)
-
-What it does (from the Mac):
-
-1. Checks `/healthz`.
-2. Starts a tiny HTTP server on the Mac's LAN IP serving `tests/fixtures/synthetic_giro.pdf`.
-3. Submits a `POST /jobs` with `use_case=bank_statement_header`, the fixture URL in `context.files`, and a Paperless-style OCR text in `context.texts` (to exercise the `text_agreement` cross-check).
-4. Polls `GET /jobs/{id}` every 2 s until terminal or 120 s timeout.
-5. Asserts: `status=="done"`, `bank_name` non-empty, `provenance.fields["result.closing_balance"].provenance_verified=True`, `text_agreement=True`, total elapsed `< 60s`.
-
-Non-zero exit means the deploy is not healthy. Roll back via `git revert HEAD`.
-
-## Operational checklists
-
-### After `ollama pull` on the host
-
-The `IX_DEFAULT_MODEL` env var on the server's `.env` must match something in `ollama list`. Changing the default means:
-
-1. Edit `/home/server/Public/infoxtractor/app/.env` → `IX_DEFAULT_MODEL=<new>`.
-2. `docker compose --project-directory /home/server/Public/infoxtractor/app restart`.
-3. `curl http://192.168.68.42:8994/healthz` → confirm `ollama: ok`.
-
-### If `/healthz` shows `ollama: degraded`
-
-`qwen3:14b` (or the configured default) is not pulled. On the host:
-```bash
-ssh server@192.168.68.42 "docker exec ollama ollama pull qwen3:14b"
-```
-
-### If `/healthz` shows `ocr: fail`
-
-Surya couldn't initialize (model missing, CUDA unavailable, OOM). First run can be slow — models download on first call. Check container logs:
-```bash
-ssh server@192.168.68.42 "docker logs infoxtractor --tail 200"
-```
-
-### If the container fails to start
-
-```bash
-ssh server@192.168.68.42 "tail -100 /tmp/infoxtractor-deploy.log"
-ssh server@192.168.68.42 "docker compose -f /home/server/Public/infoxtractor/app/docker-compose.yml logs --tail 200"
-```
-
-## Monitoring
-
- Monitoring dashboard auto-discovers via the `infrastructure.web_url` label on the container: `http://192.168.68.42:8001` → "infoxtractor" card.
- Backup opt-in via `backup.enable=true` + `backup.type=postgres` + `backup.name=infoxtractor` labels. The daily backup script picks up the `infoxtractor` Postgres database automatically.
-
-## Ports
-
-| Port | Direction | Source | Service |
-|------|-----------|--------|---------|
-| 8994/tcp | ALLOW | 192.168.68.0/24 | ix REST + healthz (LAN only; not publicly exposed) |
-
-No VPS Caddy entry; no `infrastructure.docs_url` label — this is an internal service.
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,8 +1,6 @@
 [project]
 name = "infoxtractor"
 version = "0.1.0"
-# Released 2026-04-18 with the first live deploy of the MVP. See
-# docs/deployment.md §"First deploy" for the commit + /healthz times.
 description = "Async on-prem LLM-powered structured information extraction microservice"
 readme = "README.md"
 requires-python = ">=3.12"
@ -33,12 +31,10 @@ dependencies = [

 [project.optional-dependencies]
 ocr = [
-    # Real OCR engine. Kept optional so CI (no GPU) can install the base
-    # package without the model deps.
-    # surya >= 0.17 is required: the client code uses the
-    # `surya.foundation` module, which older releases don't expose.
-    "surya-ocr>=0.17,<0.18",
-    "torch>=2.7",
+    # Real OCR engine — pulls torch + CUDA wheels. Kept optional so CI
+    # (no GPU) can install the base package without the model deps.
+    "surya-ocr>=0.9",
+    "torch>=2.4",
 ]
 dev = [
    "pytest>=8.3",
@ -48,11 +44,6 @@ dev = [
    "mypy>=1.13",
 ]

-# Note: the default pypi torch ships cu13 wheels, which emit a
-# UserWarning and fall back to CPU against the deploy host's CUDA 12.4
-# driver. Surya then runs on CPU — slower but correct for MVP. A future
-# driver upgrade unlocks GPU Surya with no code changes.
-
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
--- a/scripts/e2e_smoke.py
+++ b/scripts/e2e_smoke.py
@ -1,210 +0,0 @@
-"""End-to-end smoke test against the deployed infoxtractor service.
-
-Uploads a synthetic bank-statement fixture, polls for completion, and asserts
-the provenance flags per spec §12 E2E. Intended to run from the Mac after
-every `git push server main` as the deploy gate.
-
-Prerequisites:
-  - The service is running and reachable at --base-url (default
-    http://192.168.68.42:8994).
-  - The fixture `tests/fixtures/synthetic_giro.pdf` is present.
-  - The Mac and the server are on the same LAN (the server must be able to
-    reach the Mac to download the fixture).
-
-Exit codes:
-  0  all assertions passed within the timeout
-  1  at least one assertion failed
-  2  the job never reached a terminal state in time
-  3  the service was unreachable or returned an unexpected error
-
-Usage:
-  python scripts/e2e_smoke.py
-  python scripts/e2e_smoke.py --base-url http://localhost:8994
-"""
-
-from __future__ import annotations
-
-import argparse
-import http.server
-import json
-import socket
-import socketserver
-import sys
-import threading
-import time
-import urllib.error
-import urllib.request
-import uuid
-from pathlib import Path
-
-DEFAULT_BASE_URL = "http://192.168.68.42:8994"
-FIXTURE = Path(__file__).parent.parent / "tests" / "fixtures" / "synthetic_giro.pdf"
-TIMEOUT_SECONDS = 120
-POLL_INTERVAL_SECONDS = 2
-
-
-def find_lan_ip() -> str:
-    """Return the Mac's LAN IP that the server can reach."""
-    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
-    try:
-        # 192.168.68.42 is the server; getting the default route towards it
-        # yields the NIC with the matching subnet.
-        s.connect(("192.168.68.42", 80))
-        return s.getsockname()[0]
-    finally:
-        s.close()
-
-
-def serve_fixture_in_background(fixture: Path) -> tuple[str, threading.Event]:
-    """Serve the fixture on a temporary HTTP server; return the URL and a stop event."""
-    if not fixture.exists():
-        print(f"FIXTURE MISSING: {fixture}", file=sys.stderr)
-        sys.exit(3)
-
-    directory = fixture.parent
-    filename = fixture.name
-    lan_ip = find_lan_ip()
-
-    class Handler(http.server.SimpleHTTPRequestHandler):
-        def __init__(self, *args, **kwargs):
-            super().__init__(*args, directory=str(directory), **kwargs)
-
-        def log_message(self, format: str, *args) -> None:  # quiet
-            pass
-
-    # Pick any free port.
-    httpd = socketserver.TCPServer((lan_ip, 0), Handler)
-    port = httpd.server_address[1]
-    url = f"http://{lan_ip}:{port}/{filename}"
-    stop = threading.Event()
-
-    def _serve():
-        try:
-            while not stop.is_set():
-                httpd.handle_request()
-        finally:
-            httpd.server_close()
-
-    # Run in a thread. Use a loose timeout so handle_request returns when stop is set.
-    httpd.timeout = 0.5
-    t = threading.Thread(target=_serve, daemon=True)
-    t.start()
-    return url, stop
-
-
-def post_job(base_url: str, file_url: str, client_id: str, request_id: str) -> dict:
-    # Include a Paperless-style OCR of the fixture as context.texts so the
-    # text_agreement cross-check has something to compare against.
-    paperless_text = (
-        "DKB\n"
-        "DE89370400440532013000\n"
-        "Statement period: 01.03.2026 - 31.03.2026\n"
-        "Opening balance: 1234.56 EUR\n"
-        "Closing balance: 1450.22 EUR\n"
-        "31.03.2026\n"
-    )
-    payload = {
-        "use_case": "bank_statement_header",
-        "ix_client_id": client_id,
-        "request_id": request_id,
-        "context": {
-            "files": [file_url],
-            "texts": [paperless_text],
-        },
-    }
-    req = urllib.request.Request(
-        f"{base_url}/jobs",
-        data=json.dumps(payload).encode("utf-8"),
-        headers={"Content-Type": "application/json"},
-        method="POST",
-    )
-    with urllib.request.urlopen(req, timeout=10) as resp:
-        return json.loads(resp.read().decode("utf-8"))
-
-
-def get_job(base_url: str, job_id: str) -> dict:
-    req = urllib.request.Request(f"{base_url}/jobs/{job_id}")
-    with urllib.request.urlopen(req, timeout=10) as resp:
-        return json.loads(resp.read().decode("utf-8"))
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument("--base-url", default=DEFAULT_BASE_URL)
-    parser.add_argument("--timeout", type=int, default=TIMEOUT_SECONDS)
-    args = parser.parse_args()
-
-    # Sanity-check the service is up.
-    try:
-        with urllib.request.urlopen(f"{args.base_url}/healthz", timeout=5) as resp:
-            health = json.loads(resp.read().decode("utf-8"))
-            print(f"healthz: {health}")
-    except urllib.error.URLError as e:
-        print(f"service unreachable: {e}", file=sys.stderr)
-        return 3
-
-    fixture_url, stop_server = serve_fixture_in_background(FIXTURE)
-    print(f"serving fixture at {fixture_url}")
-
-    try:
-        client_id = "e2e_smoke"
-        request_id = f"smoke-{uuid.uuid4().hex[:8]}"
-        submit = post_job(args.base_url, fixture_url, client_id, request_id)
-        job_id = submit["job_id"]
-        print(f"submitted job_id={job_id}")
-
-        started = time.monotonic()
-        last_status = None
-        job = None
-        while time.monotonic() - started < args.timeout:
-            job = get_job(args.base_url, job_id)
-            if job["status"] != last_status:
-                print(f"[{time.monotonic() - started:5.1f}s] status={job['status']}")
-                last_status = job["status"]
-            if job["status"] in ("done", "error"):
-                break
-            time.sleep(POLL_INTERVAL_SECONDS)
-        else:
-            print(f"FAIL: timed out after {args.timeout}s", file=sys.stderr)
-            return 2
-
-        assert job is not None
-        failed = []
-
-        if job["status"] != "done":
-            failed.append(f"status={job['status']!r} (want 'done')")
-
-        response = job.get("response") or {}
-        if response.get("error"):
-            failed.append(f"response.error={response['error']!r}")
-
-        result = (response.get("ix_result") or {}).get("result") or {}
-        bank = result.get("bank_name")
-        if not isinstance(bank, str) or not bank.strip():
-            failed.append(f"bank_name={bank!r} (want non-empty string)")
-
-        fields = (response.get("provenance") or {}).get("fields") or {}
-        closing = fields.get("result.closing_balance") or {}
-        if not closing.get("provenance_verified"):
-            failed.append(f"closing_balance.provenance_verified={closing.get('provenance_verified')!r}")
-        if closing.get("text_agreement") is not True:
-            failed.append(f"closing_balance.text_agreement={closing.get('text_agreement')!r} (Paperless-style text submitted)")
-
-        elapsed = time.monotonic() - started
-        if elapsed >= 60:
-            failed.append(f"elapsed={elapsed:.1f}s (≥ 60s; slow path)")
-
-        print(json.dumps(result, indent=2, default=str))
-
-        if failed:
-            print("\n".join(f"FAIL: {f}" for f in failed), file=sys.stderr)
-            return 1
-
-        print(f"\nPASS in {elapsed:.1f}s")
-        return 0
-    finally:
-        stop_server.set()
-
-
-if __name__ == "__main__":
-    sys.exit(main())
--- a/scripts/setup_server.sh
+++ b/scripts/setup_server.sh
@ -1,127 +0,0 @@
-#!/usr/bin/env bash
-# One-shot server setup for InfoXtractor. Idempotent: safe to re-run.
-#
-# Run from the Mac:
-#   IX_POSTGRES_PASSWORD=<pw> ./scripts/setup_server.sh
-#
-# What it does on 192.168.68.42:
-#   1. Creates the bare git repo `/home/server/Public/infoxtractor/repos.git` if missing.
-#   2. Writes the post-receive hook (or updates it) and makes it executable.
-#   3. Creates the Postgres role + database on the shared `postgis` container.
-#   4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example.
-#   5. Verifies `qwen3:14b` is pulled in Ollama.
-
-set -euo pipefail
-
-SERVER="${IX_SERVER:-server@192.168.68.42}"
-APP_BASE="/home/server/Public/infoxtractor"
-REPOS_GIT="${APP_BASE}/repos.git"
-APP_DIR="${APP_BASE}/app"
-DB_NAME="infoxtractor"
-DB_USER="infoxtractor"
-
-if [ -z "${IX_POSTGRES_PASSWORD:-}" ]; then
-  read -r -s -p "Postgres password for role '${DB_USER}': " IX_POSTGRES_PASSWORD
-  echo
-fi
-
-if [ -z "${IX_POSTGRES_PASSWORD}" ]; then
-  echo "IX_POSTGRES_PASSWORD is required." >&2
-  exit 1
-fi
-
-echo "==> 1/5  Ensuring bare repo + post-receive hook on ${SERVER}"
-ssh "${SERVER}" bash -s <<EOF
-set -euo pipefail
-mkdir -p "${REPOS_GIT}" "${APP_DIR}"
-if [ ! -f "${REPOS_GIT}/HEAD" ]; then
-  git init --bare "${REPOS_GIT}"
-fi
-
-cat >"${REPOS_GIT}/hooks/post-receive" <<'HOOK'
-#!/usr/bin/env bash
-set -eo pipefail
-
-APP_DIR="${APP_DIR}"
-LOG="/tmp/infoxtractor-deploy.log"
-
-echo "[\$(date -u '+%FT%TZ')] post-receive start" >> "\$LOG"
-
-mkdir -p "\$APP_DIR"
-GIT_WORK_TREE="\$APP_DIR" git --git-dir="${REPOS_GIT}" checkout -f main >> "\$LOG" 2>&1
-
-cd "\$APP_DIR"
-docker compose up -d --build >> "\$LOG" 2>&1
-
-# Deploy gate: /healthz must return 200 within 60 s.
-for i in \$(seq 1 30); do
-  if curl -fsS http://localhost:8994/healthz > /dev/null 2>&1; then
-    echo "[\$(date -u '+%FT%TZ')] healthz OK" >> "\$LOG"
-    exit 0
-  fi
-  sleep 2
-done
-
-echo "[\$(date -u '+%FT%TZ')] healthz never reached OK" >> "\$LOG"
-docker compose logs --tail 100 >> "\$LOG" 2>&1 || true
-exit 1
-HOOK
-
-chmod +x "${REPOS_GIT}/hooks/post-receive"
-EOF
-
-echo "==> 2/5  Verifying Ollama has qwen3:14b pulled"
-if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'qwen3:14b'"; then
-  echo "FAIL: qwen3:14b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull qwen3:14b'" >&2
-  exit 1
-fi
-
-echo "==> 3/5  Creating Postgres role '${DB_USER}' and database '${DB_NAME}' on postgis container"
-# Idempotent via DO blocks; uses docker exec to avoid needing psql on the host.
-ssh "${SERVER}" bash -s <<EOF
-set -euo pipefail
-docker exec -i postgis psql -U postgres <<SQL
-DO \\\$\\\$
-BEGIN
-  IF NOT EXISTS (SELECT FROM pg_roles WHERE rolname = '${DB_USER}') THEN
-    CREATE ROLE ${DB_USER} LOGIN PASSWORD '${IX_POSTGRES_PASSWORD}';
-  ELSE
-    ALTER ROLE ${DB_USER} WITH PASSWORD '${IX_POSTGRES_PASSWORD}';
-  END IF;
-END
-\\\$\\\$;
-SQL
-
-if ! docker exec -i postgis psql -U postgres -tc "SELECT 1 FROM pg_database WHERE datname = '${DB_NAME}'" | grep -q 1; then
-  docker exec -i postgis createdb -U postgres -O ${DB_USER} ${DB_NAME}
-fi
-EOF
-
-echo "==> 4/5  Writing ${APP_DIR}/.env on the server"
-# Render .env from the repo's .env.example, substituting the password placeholder.
-LOCAL_ENV_CONTENT="$(
-  sed "s#<password>#${IX_POSTGRES_PASSWORD}#g" \
-      "$(dirname "$0")/../.env.example"
-)"
-# Append the IX_TEST_MODE=production for safety (fake mode stays off).
-# .env is written atomically and permissioned 0600.
-ssh "${SERVER}" "install -d -m 0755 '${APP_DIR}' && cat > '${APP_DIR}/.env' <<'ENVEOF'
-${LOCAL_ENV_CONTENT}
-ENVEOF
-chmod 0600 '${APP_DIR}/.env'"
-
-echo "==> 5/5  Checking UFW rule for port 8994 (LAN only)"
-ssh "${SERVER}" "sudo ufw status numbered | grep -F 8994" >/dev/null 2>&1 || {
-  echo "NOTE: UFW doesn't yet allow 8994. Run on the server:"
-  echo "  sudo ufw allow from 192.168.68.0/24 to any port 8994 proto tcp"
-}
-
-echo
-echo "Done."
-echo
-echo "Next steps (on the Mac):"
-echo "  git remote add server ssh://server@192.168.68.42${REPOS_GIT}"
-echo "  git push server main"
-echo "  ssh ${SERVER} 'tail -f /tmp/infoxtractor-deploy.log'"
-echo "  curl http://192.168.68.42:8994/healthz"
-echo "  python scripts/e2e_smoke.py"
--- a/src/ix/app.py
+++ b/src/ix/app.py
@ -5,132 +5,21 @@ worker loop (Task 3.5) and the pg_queue listener (Task 3.6). Tests that
 don't care about the worker call ``create_app(spawn_worker=False)`` so the
 lifespan returns cleanly.

-Task 4.3 fills in the production wiring:
-
-* Factories (``make_genai_client`` / ``make_ocr_client``) pick between
-  fakes (``IX_TEST_MODE=fake``) and real Ollama/Surya clients.
-* ``/healthz`` probes call ``selfcheck()`` on the active clients. In
-  ``fake`` mode they always report ok.
-* The worker's :class:`Pipeline` is built once per spawn with the real
-  chain of Steps; each call to the injected ``pipeline_factory`` returns
-  a fresh Pipeline so per-request state stays isolated.
+The factory is parameterised (``spawn_worker``) instead of env-gated because
+pytest runs multiple app instances per session and we want the decision local
+to each call, not inferred from ``IX_*`` variables. The listener is also
+gated on ``spawn_worker`` — the listener is only useful when a worker is
+draining the queue, so the two share one flag.
 """

 from __future__ import annotations

-import asyncio
-from collections.abc import AsyncIterator, Callable
+from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager, suppress
-from typing import Literal

 from fastapi import FastAPI

-from ix.adapters.rest.routes import Probes, get_probes
 from ix.adapters.rest.routes import router as rest_router
-from ix.config import AppConfig, get_config
-from ix.genai import make_genai_client
-from ix.genai.client import GenAIClient
-from ix.ocr import make_ocr_client
-from ix.ocr.client import OCRClient
-from ix.pipeline.genai_step import GenAIStep
-from ix.pipeline.ocr_step import OCRStep
-from ix.pipeline.pipeline import Pipeline
-from ix.pipeline.reliability_step import ReliabilityStep
-from ix.pipeline.response_handler_step import ResponseHandlerStep
-from ix.pipeline.setup_step import SetupStep
-
-
-def build_pipeline(
-    genai: GenAIClient, ocr: OCRClient, cfg: AppConfig
-) -> Pipeline:
-    """Assemble the production :class:`Pipeline` with injected clients.
-
-    Kept as a module-level helper so tests that want to exercise the
-    production wiring (without running the worker) can call it directly.
-    """
-
-    from pathlib import Path
-
-    from ix.ingestion import FetchConfig
-
-    return Pipeline(
-        steps=[
-            SetupStep(
-                tmp_dir=Path(cfg.tmp_dir),
-                fetch_config=FetchConfig(
-                    connect_timeout_s=float(cfg.file_connect_timeout_seconds),
-                    read_timeout_s=float(cfg.file_read_timeout_seconds),
-                    max_bytes=cfg.file_max_bytes,
-                ),
-            ),
-            OCRStep(ocr_client=ocr),
-            GenAIStep(genai_client=genai),
-            ReliabilityStep(),
-            ResponseHandlerStep(),
-        ]
-    )
-
-
-def _make_ollama_probe(
-    genai: GenAIClient, cfg: AppConfig
-) -> Callable[[], Literal["ok", "degraded", "fail"]]:
-    """Adapter: async ``selfcheck`` → sync callable the route expects.
-
-    Always drives the coroutine on a throwaway event loop in a separate
-    thread. This keeps the behavior identical whether the caller holds an
-    event loop (FastAPI request) or doesn't (a CLI tool), and avoids the
-    ``asyncio.run`` vs. already-running-loop footgun.
-    """
-
-    def probe() -> Literal["ok", "degraded", "fail"]:
-        if not hasattr(genai, "selfcheck"):
-            return "ok"  # fake client — nothing to probe.
-        return _run_async_sync(
-            lambda: genai.selfcheck(expected_model=cfg.default_model),  # type: ignore[attr-defined]
-            fallback="fail",
-        )
-
-    return probe
-
-
-def _make_ocr_probe(ocr: OCRClient) -> Callable[[], Literal["ok", "fail"]]:
-    def probe() -> Literal["ok", "fail"]:
-        if not hasattr(ocr, "selfcheck"):
-            return "ok"  # fake — nothing to probe.
-        return _run_async_sync(
-            lambda: ocr.selfcheck(),  # type: ignore[attr-defined]
-            fallback="fail",
-        )
-
-    return probe
-
-
-def _run_async_sync(make_coro, *, fallback: str) -> str:  # type: ignore[no-untyped-def]
-    """Run ``make_coro()`` on a fresh loop in a thread; return its result.
-
-    The thread owns its own event loop so the caller's loop (if any) keeps
-    running. Any exception collapses to ``fallback``.
-    """
-
-    import threading
-
-    result: dict[str, object] = {}
-
-    def _runner() -> None:
-        loop = asyncio.new_event_loop()
-        try:
-            result["value"] = loop.run_until_complete(make_coro())
-        except Exception as exc:  # any error collapses to fallback
-            result["error"] = exc
-        finally:
-            loop.close()
-
-    t = threading.Thread(target=_runner)
-    t.start()
-    t.join()
-    if "error" in result or "value" not in result:
-        return fallback
-    return str(result["value"])


 def create_app(*, spawn_worker: bool = True) -> FastAPI:
@ -147,35 +36,21 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:

    @asynccontextmanager
    async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
-        cfg = get_config()
-
-        # Build the clients once per process. The worker's pipeline
-        # factory closes over these so every job runs through the same
-        # Ollama/Surya instance (Surya's predictors are heavy; re-loading
-        # them per job would be catastrophic).
-        genai_client = make_genai_client(cfg)
-        ocr_client = make_ocr_client(cfg)
-
-        # Override the route-level probe DI so /healthz reflects the
-        # actual clients. Tests that want canned probes can still override
-        # ``get_probes`` at the TestClient layer.
-        _app.dependency_overrides.setdefault(
-            get_probes,
-            lambda: Probes(
-                ollama=_make_ollama_probe(genai_client, cfg),
-                ocr=_make_ocr_probe(ocr_client),
-            ),
-        )
-
        worker_task = None
        listener = None
        if spawn_worker:
+            # Pipeline factory + listener wiring live in Chunk 4's
+            # production entrypoint; keeping this path best-effort lets the
+            # lifespan still start even on a box where Ollama/Surya aren't
+            # available (the listener just gives us a passive 10 s poll).
            try:
                from ix.adapters.pg_queue.listener import (
                    PgQueueListener,
                    asyncpg_dsn_from_sqlalchemy_url,
                )
+                from ix.config import get_config

+                cfg = get_config()
                listener = PgQueueListener(
                    dsn=asyncpg_dsn_from_sqlalchemy_url(cfg.postgres_url)
                )
@ -184,10 +59,10 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
                listener = None

            try:
-                worker_task = await _spawn_production_worker(
-                    cfg, genai_client, ocr_client, listener
-                )
-            except Exception:
+                from ix.worker.loop import spawn_worker_task
+
+                worker_task = await spawn_worker_task(_app)
+            except ImportError:
                worker_task = None
        try:
            yield
@ -203,30 +78,3 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
    app = FastAPI(lifespan=lifespan, title="infoxtractor", version="0.1.0")
    app.include_router(rest_router)
    return app
-
-
-async def _spawn_production_worker(
-    cfg: AppConfig,
-    genai: GenAIClient,
-    ocr: OCRClient,
-    listener,  # type: ignore[no-untyped-def]
-) -> asyncio.Task[None]:
-    """Spawn the background worker with a production pipeline factory."""
-
-    from ix.store.engine import get_session_factory
-    from ix.worker.loop import Worker
-
-    def pipeline_factory() -> Pipeline:
-        return build_pipeline(genai, ocr, cfg)
-
-    worker = Worker(
-        session_factory=get_session_factory(),
-        pipeline_factory=pipeline_factory,
-        poll_interval_seconds=10.0,
-        max_running_seconds=2 * cfg.pipeline_request_timeout_seconds,
-        callback_timeout_seconds=cfg.callback_timeout_seconds,
-        wait_for_work=listener.wait_for_work if listener is not None else None,
-    )
-
-    stop = asyncio.Event()
-    return asyncio.create_task(worker.run(stop))
--- a/src/ix/config.py
+++ b/src/ix/config.py
@ -12,7 +12,6 @@ re-read after ``monkeypatch.setenv``. Production code never clears the cache.
 from __future__ import annotations

 from functools import lru_cache
-from typing import Literal

 from pydantic_settings import BaseSettings, SettingsConfigDict

@ -33,17 +32,14 @@ class AppConfig(BaseSettings):
    )

    # --- Job store ---
-    # Defaults assume the ix container runs with `network_mode: host` and
-    # reaches the shared `postgis` and `ollama` containers on loopback;
-    # spec §11 / docker-compose.yml ship that configuration.
    postgres_url: str = (
        "postgresql+asyncpg://infoxtractor:<password>"
-        "@127.0.0.1:5431/infoxtractor"
+        "@host.docker.internal:5431/infoxtractor"
    )

    # --- LLM backend ---
-    ollama_url: str = "http://127.0.0.1:11434"
-    default_model: str = "qwen3:14b"
+    ollama_url: str = "http://host.docker.internal:11434"
+    default_model: str = "gpt-oss:20b"

    # --- OCR ---
    ocr_engine: str = "surya"
@ -66,13 +62,6 @@ class AppConfig(BaseSettings):
    # --- Observability ---
    log_level: str = "INFO"

-    # --- Test / wiring mode ---
-    # ``fake``: factories return FakeGenAIClient / FakeOCRClient and
-    # ``/healthz`` probes report ok. CI sets this so the Forgejo runner
-    # doesn't need access to Ollama or GPU-backed Surya. ``None`` (default)
-    # means production wiring: real OllamaClient + SuryaOCRClient.
-    test_mode: Literal["fake"] | None = None
-

@lru_cache(maxsize=1)
 def get_config() -> AppConfig:
--- a/src/ix/genai/init.py
+++ b/src/ix/genai/init.py
@ -1,43 +1,18 @@
 """GenAI subsystem: protocol + fake client + invocation-result dataclasses.

-Real backends (Ollama, …) plug in behind :class:`GenAIClient`. The factory
-:func:`make_genai_client` picks between :class:`FakeGenAIClient` (for CI
-/ hermetic tests via ``IX_TEST_MODE=fake``) and :class:`OllamaClient`
-(production). Tests that want a real Ollama client anyway can call the
-constructor directly.
+Real backends (Ollama, etc.) plug in behind :class:`GenAIClient`. The MVP
+ships only :class:`FakeGenAIClient` from this package; the real Ollama
+client lands in Chunk 4.
 """

 from __future__ import annotations

-from ix.config import AppConfig
 from ix.genai.client import GenAIClient, GenAIInvocationResult, GenAIUsage
 from ix.genai.fake import FakeGenAIClient
-from ix.genai.ollama_client import OllamaClient
-
-
-def make_genai_client(cfg: AppConfig) -> GenAIClient:
-    """Return the :class:`GenAIClient` configured for the current run.
-
-    When ``cfg.test_mode == "fake"`` the fake is returned; the pipeline
-    callers are expected to override the injected client via DI if they
-    want a non-default canned response. Otherwise a live
-    :class:`OllamaClient` bound to ``cfg.ollama_url`` and the per-call
-    timeout is returned.
-    """
-
-    if cfg.test_mode == "fake":
-        return FakeGenAIClient(parsed=None)
-    return OllamaClient(
-        base_url=cfg.ollama_url,
-        per_call_timeout_s=float(cfg.genai_call_timeout_seconds),
-    )
-

 __all__ = [
    "FakeGenAIClient",
    "GenAIClient",
    "GenAIInvocationResult",
    "GenAIUsage",
-    "OllamaClient",
-    "make_genai_client",
 ]
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@ -1,340 +0,0 @@
-"""OllamaClient — real :class:`GenAIClient` implementation (spec §6 GenAIStep).
-
-Wraps the Ollama ``/api/chat`` structured-output endpoint. Per spec:
-
-* POST ``{base_url}/api/chat`` with ``format = <pydantic JSON schema>``,
-  ``stream = false``, and ``options`` carrying provider-neutral knobs
-  (``temperature`` mapped, ``reasoning_effort`` dropped — Ollama ignores it).
-* Messages are passed through. Content-parts lists (``[{"type":"text",...}]``)
-  are joined to a single string because MVP models (``gpt-oss:20b`` /
-  ``qwen2.5:32b``) don't accept native content-parts.
-* Per-call timeout is enforced via ``httpx``. A connection refusal, read
-  timeout, or 5xx maps to ``IX_002_000``. A 2xx whose ``message.content`` is
-  not valid JSON for the schema maps to ``IX_002_001``.
-
-``selfcheck()`` targets ``/api/tags`` with a fixed 5 s timeout and is what
-``/healthz`` consumes.
-"""
-
-from __future__ import annotations
-
-from typing import Any, Literal
-
-import httpx
-from pydantic import BaseModel, ValidationError
-
-from ix.errors import IXErrorCode, IXException
-from ix.genai.client import GenAIInvocationResult, GenAIUsage
-
-_OLLAMA_TAGS_TIMEOUT_S: float = 5.0
-_BODY_SNIPPET_MAX_CHARS: int = 240
-
-
-class OllamaClient:
-    """Async Ollama backend satisfying :class:`~ix.genai.client.GenAIClient`.
-
-    Parameters
-    ----------
-    base_url:
-        Root URL of the Ollama server (e.g. ``http://127.0.0.1:11434``).
-        Trailing slashes are stripped.
-    per_call_timeout_s:
-        Hard per-call timeout for ``/api/chat``. Spec default: 1500 s.
-    """
-
-    def __init__(self, base_url: str, per_call_timeout_s: float) -> None:
-        self._base_url = base_url.rstrip("/")
-        self._per_call_timeout_s = per_call_timeout_s
-
-    async def invoke(
-        self,
-        request_kwargs: dict[str, Any],
-        response_schema: type[BaseModel],
-    ) -> GenAIInvocationResult:
-        """Run one structured-output chat call; parse into ``response_schema``."""
-
-        body = self._translate_request(request_kwargs, response_schema)
-        url = f"{self._base_url}/api/chat"
-
-        try:
-            async with httpx.AsyncClient(timeout=self._per_call_timeout_s) as http:
-                resp = await http.post(url, json=body)
-        except httpx.HTTPError as exc:
-            raise IXException(
-                IXErrorCode.IX_002_000,
-                detail=f"ollama {exc.__class__.__name__}: {exc}",
-            ) from exc
-        except (ConnectionError, TimeoutError) as exc:  # pragma: no cover - httpx wraps these
-            raise IXException(
-                IXErrorCode.IX_002_000,
-                detail=f"ollama {exc.__class__.__name__}: {exc}",
-            ) from exc
-
-        if resp.status_code >= 500:
-            raise IXException(
-                IXErrorCode.IX_002_000,
-                detail=(
-                    f"ollama HTTP {resp.status_code}: "
-                    f"{resp.text[:_BODY_SNIPPET_MAX_CHARS]}"
-                ),
-            )
-        if resp.status_code >= 400:
-            raise IXException(
-                IXErrorCode.IX_002_000,
-                detail=(
-                    f"ollama HTTP {resp.status_code}: "
-                    f"{resp.text[:_BODY_SNIPPET_MAX_CHARS]}"
-                ),
-            )
-
-        try:
-            payload = resp.json()
-        except ValueError as exc:
-            raise IXException(
-                IXErrorCode.IX_002_000,
-                detail=f"ollama non-JSON body: {resp.text[:_BODY_SNIPPET_MAX_CHARS]}",
-            ) from exc
-
-        content = (payload.get("message") or {}).get("content") or ""
-        json_blob = _extract_json_blob(content)
-        try:
-            parsed = response_schema.model_validate_json(json_blob)
-        except ValidationError as exc:
-            raise IXException(
-                IXErrorCode.IX_002_001,
-                detail=(
-                    f"{response_schema.__name__}: {exc.__class__.__name__}: "
-                    f"body={content[:_BODY_SNIPPET_MAX_CHARS]}"
-                ),
-            ) from exc
-        except ValueError as exc:
-            # ``model_validate_json`` raises ValueError on invalid JSON (not
-            # a ValidationError). Treat as structured-output failure.
-            raise IXException(
-                IXErrorCode.IX_002_001,
-                detail=(
-                    f"{response_schema.__name__}: invalid JSON: "
-                    f"body={content[:_BODY_SNIPPET_MAX_CHARS]}"
-                ),
-            ) from exc
-
-        usage = GenAIUsage(
-            prompt_tokens=int(payload.get("prompt_eval_count") or 0),
-            completion_tokens=int(payload.get("eval_count") or 0),
-        )
-        model_name = str(payload.get("model") or request_kwargs.get("model") or "")
-        return GenAIInvocationResult(parsed=parsed, usage=usage, model_name=model_name)
-
-    async def selfcheck(
-        self, expected_model: str
-    ) -> Literal["ok", "degraded", "fail"]:
-        """Probe ``/api/tags`` for ``/healthz``.
-
-        ``ok`` when the server answers 2xx and ``expected_model`` is listed;
-        ``degraded`` when reachable but the model is missing; ``fail``
-        otherwise. Spec §5, §11.
-        """
-
-        try:
-            async with httpx.AsyncClient(timeout=_OLLAMA_TAGS_TIMEOUT_S) as http:
-                resp = await http.get(f"{self._base_url}/api/tags")
-        except (httpx.HTTPError, ConnectionError, TimeoutError):
-            return "fail"
-
-        if resp.status_code != 200:
-            return "fail"
-
-        try:
-            payload = resp.json()
-        except ValueError:
-            return "fail"
-
-        models = payload.get("models") or []
-        names = {str(entry.get("name", "")) for entry in models}
-        if expected_model in names:
-            return "ok"
-        return "degraded"
-
-    def _translate_request(
-        self,
-        request_kwargs: dict[str, Any],
-        response_schema: type[BaseModel],
-    ) -> dict[str, Any]:
-        """Map provider-neutral kwargs to Ollama's /api/chat body.
-
-        Schema strategy for Ollama 0.11.8: we pass ``format="json"`` (loose
-        JSON mode) and bake the Pydantic schema into a system message
-        ahead of the caller's own system prompt. Rationale:
-
-        * The full Pydantic schema as ``format=<schema>`` crashes llama.cpp's
-          structured-output implementation (SIGSEGV) on every non-trivial
-          shape — ``anyOf`` / ``$ref`` / ``pattern`` all trigger it.
-        * ``format="json"`` alone guarantees valid JSON but not the shape;
-          models routinely return ``{}`` when not told what fields to emit.
-        * Injecting the schema into the prompt is the cheapest way to
-          get both: the model sees the expected shape explicitly, Pydantic
-          validates the response at parse time (IX_002_001 on mismatch).
-
-        Non-Ollama ``GenAIClient`` impls can ignore this behaviour and use
-        native structured-output (``response_format`` on OpenAI, etc.).
-        """
-
-        messages = self._translate_messages(
-            list(request_kwargs.get("messages") or [])
-        )
-        messages = _inject_schema_system_message(messages, response_schema)
-        body: dict[str, Any] = {
-            "model": request_kwargs.get("model"),
-            "messages": messages,
-            "stream": False,
-            # NOTE: format is deliberately omitted. `format="json"` made
-            # reasoning models (qwen3) abort after emitting `{}` because the
-            # constrained sampler terminated before the chain-of-thought
-            # finished; `format=<schema>` segfaulted Ollama 0.11.8. Letting
-            # the model stream freely and then extracting the trailing JSON
-            # blob works for both reasoning and non-reasoning models.
-        }
-
-        options: dict[str, Any] = {}
-        if "temperature" in request_kwargs:
-            options["temperature"] = request_kwargs["temperature"]
-        # reasoning_effort intentionally dropped — Ollama doesn't support it.
-        if options:
-            body["options"] = options
-        return body
-
-    @staticmethod
-    def _translate_messages(
-        messages: list[dict[str, Any]],
-    ) -> list[dict[str, Any]]:
-        """Collapse content-parts lists into single strings for Ollama."""
-        out: list[dict[str, Any]] = []
-        for msg in messages:
-            content = msg.get("content")
-            if isinstance(content, list):
-                text_parts = [
-                    str(part.get("text", ""))
-                    for part in content
-                    if isinstance(part, dict) and part.get("type") == "text"
-                ]
-                new_content = "\n".join(text_parts)
-            else:
-                new_content = content
-            out.append({**msg, "content": new_content})
-        return out
-
-
-def _extract_json_blob(text: str) -> str:
-    """Return the outermost balanced JSON object in ``text``.
-
-    Reasoning models (qwen3, deepseek-r1) wrap their real answer in
-    ``<think>…</think>`` blocks. Other models sometimes prefix prose or
-    fence the JSON in ```json``` code blocks. Finding the last balanced
-    ``{…}`` is the cheapest robust parse that works for all three shapes;
-    a malformed response yields the full text and Pydantic catches it
-    downstream as ``IX_002_001``.
-    """
-    start = text.find("{")
-    if start < 0:
-        return text
-    depth = 0
-    in_string = False
-    escaped = False
-    for i in range(start, len(text)):
-        ch = text[i]
-        if in_string:
-            if escaped:
-                escaped = False
-            elif ch == "\\":
-                escaped = True
-            elif ch == '"':
-                in_string = False
-            continue
-        if ch == '"':
-            in_string = True
-        elif ch == "{":
-            depth += 1
-        elif ch == "}":
-            depth -= 1
-            if depth == 0:
-                return text[start : i + 1]
-    return text[start:]
-
-
-def _inject_schema_system_message(
-    messages: list[dict[str, Any]],
-    response_schema: type[BaseModel],
-) -> list[dict[str, Any]]:
-    """Prepend a system message that pins the expected JSON shape.
-
-    Ollama's ``format="json"`` mode guarantees valid JSON but not the
-    field set or names. We emit the Pydantic schema as JSON and
-    instruct the model to match it. If the caller already provides a
-    system message, we prepend ours; otherwise ours becomes the first
-    system turn.
-    """
-    import json as _json
-
-    schema_json = _json.dumps(
-        _sanitise_schema_for_ollama(response_schema.model_json_schema()),
-        indent=2,
-    )
-    guidance = (
-        "Respond ONLY with a single JSON object matching this JSON Schema "
-        "exactly. No prose, no code fences, no explanations. All top-level "
-        "properties listed in `required` MUST be present. Use null for "
-        "fields you cannot confidently extract. The JSON Schema:\n"
-        f"{schema_json}"
-    )
-    return [{"role": "system", "content": guidance}, *messages]
-
-
-def _sanitise_schema_for_ollama(schema: Any) -> Any:
-    """Strip null branches from ``anyOf`` unions.
-
-    Ollama 0.11.8's llama.cpp structured-output implementation segfaults on
-    Pydantic v2's standard Optional pattern::
-
-        {"anyOf": [{"type": "string"}, {"type": "null"}]}
-
-    We collapse any ``anyOf`` that includes a ``{"type": "null"}`` entry to
-    its non-null branch — single branch becomes that branch inline; multiple
-    branches keep the union without null. This only narrows what the LLM is
-    *told* it may emit; Pydantic still validates the real response and can
-    accept ``None`` at parse time if the field is ``Optional``.
-
-    Walk is recursive and structure-preserving. Other ``anyOf`` shapes (e.g.
-    polymorphic unions without null) are left alone.
-    """
-    if isinstance(schema, dict):
-        cleaned: dict[str, Any] = {}
-        for key, value in schema.items():
-            if key == "anyOf" and isinstance(value, list):
-                non_null = [
-                    _sanitise_schema_for_ollama(branch)
-                    for branch in value
-                    if not (isinstance(branch, dict) and branch.get("type") == "null")
-                ]
-                if len(non_null) == 1:
-                    # Inline the single remaining branch; merge its keys into the
-                    # parent so siblings like ``default``/``title`` are preserved.
-                    only = non_null[0]
-                    if isinstance(only, dict):
-                        for ok, ov in only.items():
-                            cleaned.setdefault(ok, ov)
-                    else:
-                        cleaned[key] = non_null
-                elif len(non_null) == 0:
-                    # Pathological: nothing left. Fall back to a permissive type.
-                    cleaned["type"] = "string"
-                else:
-                    cleaned[key] = non_null
-            else:
-                cleaned[key] = _sanitise_schema_for_ollama(value)
-        return cleaned
-    if isinstance(schema, list):
-        return [_sanitise_schema_for_ollama(item) for item in schema]
-    return schema
-
-
-__all__ = ["OllamaClient"]
--- a/src/ix/ocr/init.py
+++ b/src/ix/ocr/init.py
@ -1,34 +1,13 @@
-"""OCR subsystem: protocol + fake + real Surya client + factory.
+"""OCR subsystem: protocol + fake client.

-Real engines (Surya today, Azure DI / AWS Textract … deferred) plug in
-behind :class:`OCRClient`. The factory :func:`make_ocr_client` picks
-between :class:`FakeOCRClient` (when ``IX_TEST_MODE=fake``) and
-:class:`SuryaOCRClient` (production). Unknown engine names raise so a
-typo'd ``IX_OCR_ENGINE`` surfaces at startup, not later.
+Real engines (Surya, Azure DI, …) plug in behind :class:`OCRClient`. The
+MVP ships only :class:`FakeOCRClient` from this package; the real Surya
+client lands in Chunk 4.
 """

 from __future__ import annotations

-from ix.config import AppConfig
-from ix.contracts.response import OCRDetails, OCRResult
 from ix.ocr.client import OCRClient
 from ix.ocr.fake import FakeOCRClient
-from ix.ocr.surya_client import SuryaOCRClient

-
-def make_ocr_client(cfg: AppConfig) -> OCRClient:
-    """Return the :class:`OCRClient` configured for the current run."""
-
-    if cfg.test_mode == "fake":
-        return FakeOCRClient(canned=OCRResult(result=OCRDetails()))
-    if cfg.ocr_engine == "surya":
-        return SuryaOCRClient()
-    raise ValueError(f"Unknown ocr_engine: {cfg.ocr_engine!r}")
-
-
-__all__ = [
-    "FakeOCRClient",
-    "OCRClient",
-    "SuryaOCRClient",
-    "make_ocr_client",
-]
+__all__ = ["FakeOCRClient", "OCRClient"]
--- a/src/ix/ocr/client.py
+++ b/src/ix/ocr/client.py
@ -5,19 +5,11 @@ method satisfies the Protocol. :class:`~ix.pipeline.ocr_step.OCRStep`
 depends on the Protocol, not a concrete class, so swapping engines
 (``FakeOCRClient`` in tests, ``SuryaOCRClient`` in prod) stays a wiring
 change at the app factory.
-
-Per-page source location (``files`` + ``page_metadata``) flows in as
-optional kwargs: fakes ignore them; the real
-:class:`~ix.ocr.surya_client.SuryaOCRClient` uses them to render each
-page's pixels back from disk. Keeping these optional lets unit tests stay
-pages-only while production wiring (Task 4.3) plumbs through the real
-filesystem handles.
 """

 from __future__ import annotations

-from pathlib import Path
-from typing import Any, Protocol, runtime_checkable
+from typing import Protocol, runtime_checkable

 from ix.contracts import OCRResult, Page

@ -32,18 +24,8 @@ class OCRClient(Protocol):
    per input page (in the same order).
    """

-    async def ocr(
-        self,
-        pages: list[Page],
-        *,
-        files: list[tuple[Path, str]] | None = None,
-        page_metadata: list[Any] | None = None,
-    ) -> OCRResult:
-        """Run OCR over the input pages; return the structured result.
-
-        ``files`` and ``page_metadata`` are optional for hermetic tests;
-        real engines that need to re-render from disk read them.
-        """
+    async def ocr(self, pages: list[Page]) -> OCRResult:
+        """Run OCR over the input pages; return the structured result."""
        ...


--- a/src/ix/ocr/fake.py
+++ b/src/ix/ocr/fake.py
@ -30,17 +30,8 @@ class FakeOCRClient:
        self._canned = canned
        self._raise_on_call = raise_on_call

-    async def ocr(
-        self,
-        pages: list[Page],
-        **_kwargs: object,
-    ) -> OCRResult:
-        """Return the canned result or raise the configured error.
-
-        Accepts (and ignores) any keyword args the production Protocol may
-        carry — keeps the fake swappable for :class:`SuryaOCRClient` at
-        call sites that pass ``files`` / ``page_metadata``.
-        """
+    async def ocr(self, pages: list[Page]) -> OCRResult:
+        """Return the canned result or raise the configured error."""
        if self._raise_on_call is not None:
            raise self._raise_on_call
        return self._canned
--- a/src/ix/ocr/surya_client.py
+++ b/src/ix/ocr/surya_client.py
@ -1,235 +0,0 @@
-"""SuryaOCRClient — real :class:`OCRClient` backed by ``surya-ocr``.
-
-Per spec §6.2: the MVP OCR engine. Runs Surya's detection + recognition
-predictors over per-page PIL images rendered from the downloaded sources
-(PDFs via PyMuPDF, images via Pillow).
-
-Design choices:
-
-* **Lazy model loading.** ``__init__`` is cheap; the heavy predictors are
-  built on first :meth:`ocr` / :meth:`selfcheck` / explicit :meth:`warm_up`.
-  This keeps FastAPI's lifespan predictable — ops can decide whether to
-  pay the load cost up front or on first request.
-* **Device is Surya's default.** CUDA on the prod box, MPS on M-series Macs.
-  We deliberately don't pin.
-* **No text-token reuse from PyMuPDF.** The cross-check against Paperless'
-  Tesseract output (ReliabilityStep's ``text_agreement``) is only meaningful
-  with a truly independent OCR pass, so we always render-and-recognize
-  even for PDFs that carry embedded text.
-
-The ``surya-ocr`` package pulls torch + heavy model deps, so it's kept
-behind the ``[ocr]`` extra. All Surya imports are deferred into
-:meth:`warm_up` so running the unit tests (which patch the predictors)
-doesn't require the package to be installed.
-"""
-
-from __future__ import annotations
-
-import asyncio
-import contextlib
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal
-
-from ix.contracts import Line, OCRDetails, OCRResult, Page
-from ix.segmentation import PageMetadata
-
-if TYPE_CHECKING:  # pragma: no cover
-    from PIL import Image as PILImage
-
-
-class SuryaOCRClient:
-    """Surya-backed OCR engine.
-
-    Attributes are created lazily by :meth:`warm_up`. The unit tests inject
-    mocks directly onto ``_recognition_predictor`` / ``_detection_predictor``
-    to avoid the Surya import chain.
-    """
-
-    def __init__(self) -> None:
-        self._recognition_predictor: Any = None
-        self._detection_predictor: Any = None
-
-    def warm_up(self) -> None:
-        """Load the detection + recognition predictors. Idempotent.
-
-        Called automatically on the first :meth:`ocr` / :meth:`selfcheck`,
-        or explicitly from the app lifespan to front-load the cost.
-        """
-        if (
-            self._recognition_predictor is not None
-            and self._detection_predictor is not None
-        ):
-            return
-
-        # Deferred imports: only reachable when the optional [ocr] extra is
-        # installed. Keeping them inside the method so base-install unit
-        # tests (which patch the predictors) don't need surya on sys.path.
-        from surya.detection import DetectionPredictor  # type: ignore[import-not-found]
-        from surya.foundation import FoundationPredictor  # type: ignore[import-not-found]
-        from surya.recognition import RecognitionPredictor  # type: ignore[import-not-found]
-
-        foundation = FoundationPredictor()
-        self._recognition_predictor = RecognitionPredictor(foundation)
-        self._detection_predictor = DetectionPredictor()
-
-    async def ocr(
-        self,
-        pages: list[Page],
-        *,
-        files: list[tuple[Path, str]] | None = None,
-        page_metadata: list[Any] | None = None,
-    ) -> OCRResult:
-        """Render each input page, run Surya, translate back to contracts."""
-        self.warm_up()
-
-        images = self._render_pages(pages, files, page_metadata)
-
-        # Surya is blocking — run it off the event loop.
-        loop = asyncio.get_running_loop()
-        surya_results = await loop.run_in_executor(
-            None, self._run_recognition, images
-        )
-
-        out_pages: list[Page] = []
-        all_text_fragments: list[str] = []
-        for input_page, surya_result in zip(pages, surya_results, strict=True):
-            lines: list[Line] = []
-            for tl in getattr(surya_result, "text_lines", []) or []:
-                flat = self._flatten_polygon(getattr(tl, "polygon", None))
-                text = getattr(tl, "text", None)
-                lines.append(Line(text=text, bounding_box=flat))
-                if text:
-                    all_text_fragments.append(text)
-            out_pages.append(
-                Page(
-                    page_no=input_page.page_no,
-                    width=input_page.width,
-                    height=input_page.height,
-                    angle=input_page.angle,
-                    unit=input_page.unit,
-                    lines=lines,
-                )
-            )
-
-        details = OCRDetails(
-            text="\n".join(all_text_fragments) if all_text_fragments else None,
-            pages=out_pages,
-        )
-        return OCRResult(result=details, meta_data={"engine": "surya"})
-
-    async def selfcheck(self) -> Literal["ok", "fail"]:
-        """Run the predictors on a 1x1 image to confirm the stack works."""
-        try:
-            self.warm_up()
-        except Exception:
-            return "fail"
-
-        try:
-            from PIL import Image as PILImageRuntime
-
-            img = PILImageRuntime.new("RGB", (1, 1), color="white")
-            loop = asyncio.get_running_loop()
-            await loop.run_in_executor(None, self._run_recognition, [img])
-        except Exception:
-            return "fail"
-        return "ok"
-
-    def _run_recognition(self, images: list[PILImage.Image]) -> list[Any]:
-        """Invoke the recognition predictor. Kept tiny for threadpool offload."""
-        return list(
-            self._recognition_predictor(
-                images, det_predictor=self._detection_predictor
-            )
-        )
-
-    def _render_pages(
-        self,
-        pages: list[Page],
-        files: list[tuple[Path, str]] | None,
-        page_metadata: list[Any] | None,
-    ) -> list[PILImage.Image]:
-        """Render each input :class:`Page` to a PIL image.
-
-        We walk pages + page_metadata in lockstep so we know which source
-        file each page came from and (for PDFs) what page-index to render.
-        Text-only pages (``file_index is None``) get a blank 1x1 placeholder
-        so Surya returns an empty result and downstream code still gets one
-        entry per input page.
-        """
-        from PIL import Image as PILImageRuntime
-
-        metas: list[PageMetadata] = list(page_metadata or [])
-        file_records: list[tuple[Path, str]] = list(files or [])
-
-        # Per-file lazy PDF openers so we don't re-open across pages.
-        pdf_docs: dict[int, Any] = {}
-
-        # Per-file running page-within-file counter. For PDFs we emit one
-        # entry per PDF page in order; ``pages`` was built the same way by
-        # DocumentIngestor, so a parallel counter reconstructs the mapping.
-        per_file_cursor: dict[int, int] = {}
-
-        rendered: list[PILImage.Image] = []
-        try:
-            for idx, _page in enumerate(pages):
-                meta = metas[idx] if idx < len(metas) else PageMetadata()
-                file_index = meta.file_index
-                if file_index is None or file_index >= len(file_records):
-                    # Text-only page — placeholder image; Surya returns empty.
-                    rendered.append(
-                        PILImageRuntime.new("RGB", (1, 1), color="white")
-                    )
-                    continue
-
-                local_path, mime = file_records[file_index]
-                if mime == "application/pdf":
-                    doc = pdf_docs.get(file_index)
-                    if doc is None:
-                        import fitz  # PyMuPDF
-
-                        doc = fitz.open(str(local_path))
-                        pdf_docs[file_index] = doc
-                    pdf_page_no = per_file_cursor.get(file_index, 0)
-                    per_file_cursor[file_index] = pdf_page_no + 1
-                    pdf_page = doc.load_page(pdf_page_no)
-                    pix = pdf_page.get_pixmap(dpi=200)
-                    img = PILImageRuntime.frombytes(
-                        "RGB", (pix.width, pix.height), pix.samples
-                    )
-                    rendered.append(img)
-                elif mime in ("image/png", "image/jpeg", "image/tiff"):
-                    frame_no = per_file_cursor.get(file_index, 0)
-                    per_file_cursor[file_index] = frame_no + 1
-                    img = PILImageRuntime.open(local_path)
-                    # Handle multi-frame (TIFF) — seek to the right frame.
-                    with contextlib.suppress(EOFError):
-                        img.seek(frame_no)
-                    rendered.append(img.convert("RGB"))
-                else:  # pragma: no cover - ingestor already rejected
-                    rendered.append(
-                        PILImageRuntime.new("RGB", (1, 1), color="white")
-                    )
-        finally:
-            for doc in pdf_docs.values():
-                with contextlib.suppress(Exception):
-                    doc.close()
-        return rendered
-
-    @staticmethod
-    def _flatten_polygon(polygon: Any) -> list[float]:
-        """Flatten ``[[x1,y1],[x2,y2],[x3,y3],[x4,y4]]`` → 8-float list.
-
-        Surya emits 4 quad corners. The spec wants 8 raw-pixel coords so
-        downstream provenance normalisation can consume them directly.
-        """
-        if not polygon:
-            return []
-        flat: list[float] = []
-        for point in polygon:
-            if isinstance(point, (list, tuple)) and len(point) >= 2:
-                flat.append(float(point[0]))
-                flat.append(float(point[1]))
-        return flat
-
-
-__all__ = ["SuryaOCRClient"]
--- a/src/ix/pipeline/ocr_step.py
+++ b/src/ix/pipeline/ocr_step.py
@ -56,11 +56,7 @@ class OCRStep(Step):
        assert ctx is not None, "SetupStep must populate response_ix.context"

        pages = list(getattr(ctx, "pages", []))
-        files = list(getattr(ctx, "files", []) or [])
-        page_metadata = list(getattr(ctx, "page_metadata", []) or [])
-        ocr_result = await self._client.ocr(
-            pages, files=files, page_metadata=page_metadata
-        )
+        ocr_result = await self._client.ocr(pages)

        # Inject page tags around each OCR page's content so the LLM can
        # cross-reference the visual anchor without a separate prompt hack.
--- a/src/ix/use_cases/bank_statement_header.py
+++ b/src/ix/use_cases/bank_statement_header.py
@ -26,7 +26,7 @@ class Request(BaseModel):
    model_config = ConfigDict(extra="forbid")

    use_case_name: str = "Bank Statement Header"
-    default_model: str = "qwen3:14b"
+    default_model: str = "gpt-oss:20b"
    system_prompt: str = (
        "You extract header metadata from a single bank or credit-card statement. "
        "Return only facts that appear in the document; leave a field null if uncertain. "
--- a/src/ix/worker/loop.py
+++ b/src/ix/worker/loop.py
@ -28,6 +28,8 @@ from collections.abc import Callable
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING

+from fastapi import FastAPI
+
 from ix.contracts.response import ResponseIX
 from ix.errors import IXErrorCode, IXException
 from ix.pipeline.pipeline import Pipeline
@ -177,3 +179,17 @@ class Worker:
            await session.commit()


+async def spawn_worker_task(app: FastAPI):  # type: ignore[no-untyped-def]
+    """Hook called from the FastAPI lifespan (Task 3.4).
+
+    This module-level async function is here so ``ix.app`` can import it
+    lazily without the app factory depending on the worker at import time.
+    Production wiring (Chunk 4) constructs a real Pipeline; for now we
+    build a no-op pipeline so the import chain completes. Tests that need
+    the worker wire their own Worker explicitly.
+    """
+
+    # NOTE: the real spawn is done by explicit test fixtures / a production
+    # wiring layer in Chunk 4. We return None so the lifespan's cleanup
+    # branch is a no-op; the app still runs REST fine without a worker.
+    return None
--- a/tests/live/init.py
+++ b/tests/live/init.py
--- a/tests/live/test_ollama_client_live.py
+++ b/tests/live/test_ollama_client_live.py
@ -1,70 +0,0 @@
-"""Live tests for :class:`OllamaClient` — gated on ``IX_TEST_OLLAMA=1``.
-
-Never runs in CI (Forgejo runner has no LAN access to Ollama). Run locally::
-
-    IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v
-
-Assumes the Ollama server at ``http://192.168.68.42:11434`` already has
-``qwen3:14b`` pulled.
-"""
-
-from __future__ import annotations
-
-import os
-
-import pytest
-
-from ix.genai.ollama_client import OllamaClient
-from ix.use_cases.bank_statement_header import BankStatementHeader
-
-pytestmark = [
-    pytest.mark.live,
-    pytest.mark.skipif(
-        os.environ.get("IX_TEST_OLLAMA") != "1",
-        reason="live: IX_TEST_OLLAMA=1 required",
-    ),
-]
-
-_OLLAMA_URL = "http://192.168.68.42:11434"
-_MODEL = "qwen3:14b"
-
-
-async def test_structured_output_round_trip() -> None:
-    """Real Ollama returns a parsed BankStatementHeader instance."""
-    client = OllamaClient(base_url=_OLLAMA_URL, per_call_timeout_s=300.0)
-    result = await client.invoke(
-        request_kwargs={
-            "model": _MODEL,
-            "messages": [
-                {
-                    "role": "system",
-                    "content": (
-                        "You extract bank statement header fields. "
-                        "Return valid JSON matching the given schema. "
-                        "Do not invent values."
-                    ),
-                },
-                {
-                    "role": "user",
-                    "content": (
-                        "Bank: Deutsche Kreditbank (DKB)\n"
-                        "Currency: EUR\n"
-                        "IBAN: DE89370400440532013000\n"
-                        "Period: 2025-01-01 to 2025-01-31"
-                    ),
-                },
-            ],
-        },
-        response_schema=BankStatementHeader,
-    )
-    assert isinstance(result.parsed, BankStatementHeader)
-    assert isinstance(result.parsed.bank_name, str)
-    assert result.parsed.bank_name  # non-empty
-    assert isinstance(result.parsed.currency, str)
-    assert result.model_name  # server echoes a model name
-
-
-async def test_selfcheck_ok_against_real_server() -> None:
-    """``selfcheck`` returns ``ok`` when the target model is pulled."""
-    client = OllamaClient(base_url=_OLLAMA_URL, per_call_timeout_s=5.0)
-    assert await client.selfcheck(expected_model=_MODEL) == "ok"
--- a/tests/live/test_surya_client_live.py
+++ b/tests/live/test_surya_client_live.py
@ -1,83 +0,0 @@
-"""Live test for :class:`SuryaOCRClient` — gated on ``IX_TEST_OLLAMA=1``.
-
-Downloads real Surya models (hundreds of MB) on first run. Never runs in
-CI. Exercised locally with::
-
-    IX_TEST_OLLAMA=1 uv run pytest tests/live/test_surya_client_live.py -v
-
-Note: requires the ``[ocr]`` extra — ``uv sync --extra ocr --extra dev``.
-"""
-
-from __future__ import annotations
-
-import os
-from pathlib import Path
-
-import pytest
-
-from ix.contracts import Page
-from ix.segmentation import PageMetadata
-
-pytestmark = [
-    pytest.mark.live,
-    pytest.mark.skipif(
-        os.environ.get("IX_TEST_OLLAMA") != "1",
-        reason="live: IX_TEST_OLLAMA=1 required",
-    ),
-]
-
-
-async def test_extracts_dkb_and_iban_from_synthetic_giro() -> None:
-    """Real Surya run against ``tests/fixtures/synthetic_giro.pdf``.
-
-    Assert the flat text contains ``"DKB"`` and the IBAN without spaces.
-    """
-    from ix.ocr.surya_client import SuryaOCRClient
-
-    fixture = Path(__file__).parent.parent / "fixtures" / "synthetic_giro.pdf"
-    assert fixture.exists(), f"missing fixture: {fixture}"
-
-    # Build Pages the way DocumentIngestor would for this PDF: count pages
-    # via PyMuPDF so we pass the right number of inputs.
-    import fitz
-
-    doc = fitz.open(str(fixture))
-    try:
-        pages = [
-            Page(
-                page_no=i + 1,
-                width=float(p.rect.width),
-                height=float(p.rect.height),
-                lines=[],
-            )
-            for i, p in enumerate(doc)
-        ]
-    finally:
-        doc.close()
-
-    client = SuryaOCRClient()
-    result = await client.ocr(
-        pages,
-        files=[(fixture, "application/pdf")],
-        page_metadata=[PageMetadata(file_index=0) for _ in pages],
-    )
-
-    flat_text = result.result.text or ""
-    # Join page-level line texts if flat text missing (shape-safety).
-    if not flat_text:
-        flat_text = "\n".join(
-            line.text or ""
-            for page in result.result.pages
-            for line in page.lines
-        )
-
-    assert "DKB" in flat_text
-    assert "DE89370400440532013000" in flat_text.replace(" ", "")
-
-
-async def test_selfcheck_ok_against_real_predictors() -> None:
-    """``selfcheck()`` returns ``ok`` once Surya's predictors load."""
-    from ix.ocr.surya_client import SuryaOCRClient
-
-    client = SuryaOCRClient()
-    assert await client.selfcheck() == "ok"
--- a/tests/unit/test_app_wiring.py
+++ b/tests/unit/test_app_wiring.py
@ -1,104 +0,0 @@
-"""Tests for ``ix.app`` lifespan / probe wiring (Task 4.3).
-
-The lifespan selects fake clients when ``IX_TEST_MODE=fake`` and exposes
-their probes via the route DI hook. These tests exercise the probe
-adapter in isolation — no DB, no real Ollama/Surya.
-"""
-
-from __future__ import annotations
-
-from typing import Literal
-
-from ix.app import _make_ocr_probe, _make_ollama_probe, build_pipeline
-from ix.config import AppConfig
-from ix.genai.fake import FakeGenAIClient
-from ix.ocr.fake import FakeOCRClient
-from ix.pipeline.genai_step import GenAIStep
-from ix.pipeline.ocr_step import OCRStep
-from ix.pipeline.pipeline import Pipeline
-from ix.pipeline.reliability_step import ReliabilityStep
-from ix.pipeline.response_handler_step import ResponseHandlerStep
-from ix.pipeline.setup_step import SetupStep
-
-
-def _cfg(**overrides: object) -> AppConfig:
-    return AppConfig(_env_file=None, **overrides)  # type: ignore[call-arg]
-
-
-class _SelfcheckOllamaClient:
-    async def invoke(self, *a: object, **kw: object) -> object:
-        raise NotImplementedError
-
-    async def selfcheck(
-        self, expected_model: str
-    ) -> Literal["ok", "degraded", "fail"]:
-        self.called_with = expected_model
-        return "ok"
-
-
-class _SelfcheckOCRClient:
-    async def ocr(self, *a: object, **kw: object) -> object:
-        raise NotImplementedError
-
-    async def selfcheck(self) -> Literal["ok", "fail"]:
-        return "ok"
-
-
-class _BrokenSelfcheckOllama:
-    async def invoke(self, *a: object, **kw: object) -> object:
-        raise NotImplementedError
-
-    async def selfcheck(
-        self, expected_model: str
-    ) -> Literal["ok", "degraded", "fail"]:
-        raise RuntimeError("boom")
-
-
-class TestOllamaProbe:
-    def test_fake_client_without_selfcheck_reports_ok(self) -> None:
-        cfg = _cfg(test_mode="fake", default_model="gpt-oss:20b")
-        probe = _make_ollama_probe(FakeGenAIClient(parsed=None), cfg)
-        assert probe() == "ok"
-
-    def test_real_selfcheck_returns_its_verdict(self) -> None:
-        cfg = _cfg(default_model="gpt-oss:20b")
-        client = _SelfcheckOllamaClient()
-        probe = _make_ollama_probe(client, cfg)  # type: ignore[arg-type]
-        assert probe() == "ok"
-        assert client.called_with == "gpt-oss:20b"
-
-    def test_selfcheck_exception_falls_back_to_fail(self) -> None:
-        cfg = _cfg(default_model="gpt-oss:20b")
-        probe = _make_ollama_probe(_BrokenSelfcheckOllama(), cfg)  # type: ignore[arg-type]
-        assert probe() == "fail"
-
-
-class TestOCRProbe:
-    def test_fake_client_without_selfcheck_reports_ok(self) -> None:
-        from ix.contracts.response import OCRDetails, OCRResult
-
-        probe = _make_ocr_probe(FakeOCRClient(canned=OCRResult(result=OCRDetails())))
-        assert probe() == "ok"
-
-    def test_real_selfcheck_returns_its_verdict(self) -> None:
-        probe = _make_ocr_probe(_SelfcheckOCRClient())  # type: ignore[arg-type]
-        assert probe() == "ok"
-
-
-class TestBuildPipeline:
-    def test_assembles_all_five_steps_in_order(self) -> None:
-        from ix.contracts.response import OCRDetails, OCRResult
-
-        genai = FakeGenAIClient(parsed=None)
-        ocr = FakeOCRClient(canned=OCRResult(result=OCRDetails()))
-        cfg = _cfg(test_mode="fake")
-        pipeline = build_pipeline(genai, ocr, cfg)
-        assert isinstance(pipeline, Pipeline)
-        steps = pipeline._steps  # type: ignore[attr-defined]
-        assert [type(s) for s in steps] == [
-            SetupStep,
-            OCRStep,
-            GenAIStep,
-            ReliabilityStep,
-            ResponseHandlerStep,
-        ]
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@ -51,10 +51,10 @@ def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:

    assert cfg.postgres_url == (
        "postgresql+asyncpg://infoxtractor:<password>"
-        "@127.0.0.1:5431/infoxtractor"
+        "@host.docker.internal:5431/infoxtractor"
    )
-    assert cfg.ollama_url == "http://127.0.0.1:11434"
-    assert cfg.default_model == "qwen3:14b"
+    assert cfg.ollama_url == "http://host.docker.internal:11434"
+    assert cfg.default_model == "gpt-oss:20b"
    assert cfg.ocr_engine == "surya"
    assert cfg.tmp_dir == "/tmp/ix"
    assert cfg.pipeline_worker_concurrency == 1
--- a/tests/unit/test_factories.py
+++ b/tests/unit/test_factories.py
@ -1,60 +0,0 @@
-"""Tests for the GenAI + OCR factories (Task 4.3).
-
-The factories pick between fake and real clients based on
-``IX_TEST_MODE``. CI runs with ``IX_TEST_MODE=fake``, production runs
-without — so the selection knob is the one lever between hermetic CI and
-real clients.
-"""
-
-from __future__ import annotations
-
-from ix.config import AppConfig
-from ix.genai import make_genai_client
-from ix.genai.fake import FakeGenAIClient
-from ix.genai.ollama_client import OllamaClient
-from ix.ocr import make_ocr_client
-from ix.ocr.fake import FakeOCRClient
-from ix.ocr.surya_client import SuryaOCRClient
-
-
-def _cfg(**overrides: object) -> AppConfig:
-    """Build an AppConfig without loading the repo's .env.example."""
-    return AppConfig(_env_file=None, **overrides)  # type: ignore[call-arg]
-
-
-class TestGenAIFactory:
-    def test_fake_mode_returns_fake(self) -> None:
-        cfg = _cfg(test_mode="fake")
-        client = make_genai_client(cfg)
-        assert isinstance(client, FakeGenAIClient)
-
-    def test_production_returns_ollama_with_configured_url(self) -> None:
-        cfg = _cfg(
-            test_mode=None,
-            ollama_url="http://ollama.host:11434",
-            genai_call_timeout_seconds=42,
-        )
-        client = make_genai_client(cfg)
-        assert isinstance(client, OllamaClient)
-        # Inspect the private attrs for binding correctness.
-        assert client._base_url == "http://ollama.host:11434"
-        assert client._per_call_timeout_s == 42
-
-
-class TestOCRFactory:
-    def test_fake_mode_returns_fake(self) -> None:
-        cfg = _cfg(test_mode="fake")
-        client = make_ocr_client(cfg)
-        assert isinstance(client, FakeOCRClient)
-
-    def test_production_surya_returns_surya(self) -> None:
-        cfg = _cfg(test_mode=None, ocr_engine="surya")
-        client = make_ocr_client(cfg)
-        assert isinstance(client, SuryaOCRClient)
-
-    def test_unknown_engine_raises(self) -> None:
-        cfg = _cfg(test_mode=None, ocr_engine="tesseract")
-        import pytest
-
-        with pytest.raises(ValueError, match="ocr_engine"):
-            make_ocr_client(cfg)
--- a/tests/unit/test_genai_step.py
+++ b/tests/unit/test_genai_step.py
@ -363,8 +363,8 @@ class TestModelSelection:
        req = _make_request(include_provenance=False)
        resp = _response_with_segment_index(lines=["hello"])
        await step.process(req, resp)
-        # use-case default is qwen3:14b
-        assert client.request_kwargs["model"] == "qwen3:14b"  # type: ignore[index]
+        # use-case default is gpt-oss:20b
+        assert client.request_kwargs["model"] == "gpt-oss:20b"  # type: ignore[index]


 # ----------------------------------------------------------------------------
--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@ -1,270 +0,0 @@
-"""Tests for :class:`OllamaClient` — hermetic, pytest-httpx-driven.
-
-Covers spec §6 GenAIStep Ollama call contract:
-
-* POST body shape (model / messages / format / stream / options).
-* Response parsing → :class:`GenAIInvocationResult`.
-* Error mapping: connection / timeout / 5xx → ``IX_002_000``;
-  schema-violating body → ``IX_002_001``.
-* ``selfcheck()``: tags-reachable + model-listed → ``ok``;
-  reachable-but-missing → ``degraded``; unreachable → ``fail``.
-"""
-
-from __future__ import annotations
-
-import httpx
-import pytest
-from pydantic import BaseModel
-from pytest_httpx import HTTPXMock
-
-from ix.errors import IXErrorCode, IXException
-from ix.genai.ollama_client import OllamaClient
-
-
-class _Schema(BaseModel):
-    """Trivial structured-output schema for the round-trip tests."""
-
-    bank_name: str
-    account_number: str | None = None
-
-
-def _ollama_chat_ok_body(content_json: str) -> dict:
-    """Build a minimal Ollama /api/chat success body."""
-    return {
-        "model": "gpt-oss:20b",
-        "message": {"role": "assistant", "content": content_json},
-        "done": True,
-        "eval_count": 42,
-        "prompt_eval_count": 17,
-    }
-
-
-class TestInvokeHappyPath:
-    async def test_posts_to_chat_endpoint_with_format_and_no_stream(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        httpx_mock.add_response(
-            url="http://ollama.test:11434/api/chat",
-            method="POST",
-            json=_ollama_chat_ok_body('{"bank_name":"DKB","account_number":"DE89"}'),
-        )
-
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
-        )
-        result = await client.invoke(
-            request_kwargs={
-                "model": "gpt-oss:20b",
-                "messages": [
-                    {"role": "system", "content": "You extract."},
-                    {"role": "user", "content": "Doc body"},
-                ],
-                "temperature": 0.2,
-                "reasoning_effort": "high",  # dropped silently
-            },
-            response_schema=_Schema,
-        )
-
-        assert result.parsed == _Schema(bank_name="DKB", account_number="DE89")
-        assert result.model_name == "gpt-oss:20b"
-        assert result.usage.prompt_tokens == 17
-        assert result.usage.completion_tokens == 42
-
-        # Verify request shape.
-        requests = httpx_mock.get_requests()
-        assert len(requests) == 1
-        body = requests[0].read().decode()
-        import json
-
-        body_json = json.loads(body)
-        assert body_json["model"] == "gpt-oss:20b"
-        assert body_json["stream"] is False
-        # No `format` is sent: Ollama 0.11.8 segfaults on full schemas and
-        # aborts to `{}` with `format=json` on reasoning models. Schema is
-        # injected into the system prompt instead; we extract the trailing
-        # JSON blob from the response and validate via Pydantic.
-        assert "format" not in body_json
-        assert body_json["options"]["temperature"] == 0.2
-        assert "reasoning_effort" not in body_json
-        # A schema-guidance system message is prepended to the caller's
-        # messages so Ollama (format=json loose mode) emits the right shape.
-        msgs = body_json["messages"]
-        assert msgs[0]["role"] == "system"
-        assert "JSON Schema" in msgs[0]["content"]
-        assert msgs[1:] == [
-            {"role": "system", "content": "You extract."},
-            {"role": "user", "content": "Doc body"},
-        ]
-
-    async def test_text_parts_content_list_is_joined(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        httpx_mock.add_response(
-            url="http://ollama.test:11434/api/chat",
-            method="POST",
-            json=_ollama_chat_ok_body('{"bank_name":"X"}'),
-        )
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
-        )
-        await client.invoke(
-            request_kwargs={
-                "model": "gpt-oss:20b",
-                "messages": [
-                    {
-                        "role": "user",
-                        "content": [
-                            {"type": "text", "text": "part-a"},
-                            {"type": "text", "text": "part-b"},
-                        ],
-                    }
-                ],
-            },
-            response_schema=_Schema,
-        )
-        import json
-
-        request_body = json.loads(httpx_mock.get_requests()[0].read())
-        # First message is the auto-injected schema guidance; after that
-        # the caller's user message has its text parts joined.
-        assert request_body["messages"][0]["role"] == "system"
-        assert request_body["messages"][1:] == [
-            {"role": "user", "content": "part-a\npart-b"}
-        ]
-
-
-class TestInvokeErrorPaths:
-    async def test_connection_error_maps_to_002_000(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        httpx_mock.add_exception(httpx.ConnectError("refused"))
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=1.0
-        )
-        with pytest.raises(IXException) as ei:
-            await client.invoke(
-                request_kwargs={
-                    "model": "gpt-oss:20b",
-                    "messages": [{"role": "user", "content": "hi"}],
-                },
-                response_schema=_Schema,
-            )
-        assert ei.value.code is IXErrorCode.IX_002_000
-
-    async def test_read_timeout_maps_to_002_000(self, httpx_mock: HTTPXMock) -> None:
-        httpx_mock.add_exception(httpx.ReadTimeout("slow"))
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=0.5
-        )
-        with pytest.raises(IXException) as ei:
-            await client.invoke(
-                request_kwargs={
-                    "model": "gpt-oss:20b",
-                    "messages": [{"role": "user", "content": "hi"}],
-                },
-                response_schema=_Schema,
-            )
-        assert ei.value.code is IXErrorCode.IX_002_000
-
-    async def test_500_maps_to_002_000_with_body_snippet(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        httpx_mock.add_response(
-            url="http://ollama.test:11434/api/chat",
-            method="POST",
-            status_code=500,
-            text="boom boom server broken",
-        )
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
-        )
-        with pytest.raises(IXException) as ei:
-            await client.invoke(
-                request_kwargs={
-                    "model": "gpt-oss:20b",
-                    "messages": [{"role": "user", "content": "hi"}],
-                },
-                response_schema=_Schema,
-            )
-        assert ei.value.code is IXErrorCode.IX_002_000
-        assert "boom" in (ei.value.detail or "")
-
-    async def test_200_with_invalid_json_maps_to_002_001(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        httpx_mock.add_response(
-            url="http://ollama.test:11434/api/chat",
-            method="POST",
-            json=_ollama_chat_ok_body("not-json"),
-        )
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
-        )
-        with pytest.raises(IXException) as ei:
-            await client.invoke(
-                request_kwargs={
-                    "model": "gpt-oss:20b",
-                    "messages": [{"role": "user", "content": "hi"}],
-                },
-                response_schema=_Schema,
-            )
-        assert ei.value.code is IXErrorCode.IX_002_001
-
-    async def test_200_with_schema_violation_maps_to_002_001(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        # Missing required `bank_name` field.
-        httpx_mock.add_response(
-            url="http://ollama.test:11434/api/chat",
-            method="POST",
-            json=_ollama_chat_ok_body('{"account_number":"DE89"}'),
-        )
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
-        )
-        with pytest.raises(IXException) as ei:
-            await client.invoke(
-                request_kwargs={
-                    "model": "gpt-oss:20b",
-                    "messages": [{"role": "user", "content": "hi"}],
-                },
-                response_schema=_Schema,
-            )
-        assert ei.value.code is IXErrorCode.IX_002_001
-
-
-class TestSelfcheck:
-    async def test_selfcheck_ok_when_model_listed(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        httpx_mock.add_response(
-            url="http://ollama.test:11434/api/tags",
-            method="GET",
-            json={"models": [{"name": "gpt-oss:20b"}, {"name": "qwen2.5:32b"}]},
-        )
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
-        )
-        assert await client.selfcheck(expected_model="gpt-oss:20b") == "ok"
-
-    async def test_selfcheck_degraded_when_model_missing(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        httpx_mock.add_response(
-            url="http://ollama.test:11434/api/tags",
-            method="GET",
-            json={"models": [{"name": "qwen2.5:32b"}]},
-        )
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
-        )
-        assert await client.selfcheck(expected_model="gpt-oss:20b") == "degraded"
-
-    async def test_selfcheck_fail_on_connection_error(
-        self, httpx_mock: HTTPXMock
-    ) -> None:
-        httpx_mock.add_exception(httpx.ConnectError("refused"))
-        client = OllamaClient(
-            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
-        )
-        assert await client.selfcheck(expected_model="gpt-oss:20b") == "fail"
--- a/tests/unit/test_surya_client.py
+++ b/tests/unit/test_surya_client.py
@ -1,166 +0,0 @@
-"""Tests for :class:`SuryaOCRClient` — hermetic, no model download.
-
-The real Surya predictors are patched out with :class:`unittest.mock.MagicMock`
-that return trivially-shaped line objects. The tests assert the client's
-translation layer — flattening polygons, mapping text_lines → ``Line``,
-preserving ``page_no``/``width``/``height`` per input page.
-"""
-
-from __future__ import annotations
-
-from pathlib import Path
-from types import SimpleNamespace
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from ix.contracts import Page
-from ix.ocr.surya_client import SuryaOCRClient
-from ix.segmentation import PageMetadata
-
-
-def _make_surya_line(text: str, polygon: list[list[float]]) -> SimpleNamespace:
-    """Mimic ``surya.recognition.schema.TextLine`` duck-typing-style."""
-    return SimpleNamespace(text=text, polygon=polygon, confidence=0.95)
-
-
-def _make_surya_ocr_result(lines: list[SimpleNamespace]) -> SimpleNamespace:
-    """Mimic ``surya.recognition.schema.OCRResult``."""
-    return SimpleNamespace(text_lines=lines, image_bbox=[0, 0, 100, 100])
-
-
-class TestOCRBuildsOCRResultFromMockedPredictors:
-    async def test_one_image_one_line_flatten_polygon(self, tmp_path: Path) -> None:
-        img_path = tmp_path / "a.png"
-        _write_tiny_png(img_path)
-
-        mock_line = _make_surya_line(
-            text="hello",
-            polygon=[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]],
-        )
-        mock_predictor = MagicMock(
-            return_value=[_make_surya_ocr_result([mock_line])]
-        )
-
-        client = SuryaOCRClient()
-        # Skip the real warm_up; inject the mock directly.
-        client._recognition_predictor = mock_predictor
-        client._detection_predictor = MagicMock()
-
-        pages = [Page(page_no=1, width=100.0, height=50.0, lines=[])]
-        result = await client.ocr(
-            pages,
-            files=[(img_path, "image/png")],
-            page_metadata=[PageMetadata(file_index=0)],
-        )
-
-        assert len(result.result.pages) == 1
-        out_page = result.result.pages[0]
-        assert out_page.page_no == 1
-        assert out_page.width == 100.0
-        assert out_page.height == 50.0
-        assert len(out_page.lines) == 1
-        assert out_page.lines[0].text == "hello"
-        assert out_page.lines[0].bounding_box == [
-            1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
-        ]
-
-    async def test_multiple_pages_preserves_order(self, tmp_path: Path) -> None:
-        img_a = tmp_path / "a.png"
-        img_b = tmp_path / "b.png"
-        _write_tiny_png(img_a)
-        _write_tiny_png(img_b)
-
-        mock_predictor = MagicMock(
-            return_value=[
-                _make_surya_ocr_result(
-                    [_make_surya_line("a-line", [[0, 0], [1, 0], [1, 1], [0, 1]])]
-                ),
-                _make_surya_ocr_result(
-                    [_make_surya_line("b-line", [[0, 0], [1, 0], [1, 1], [0, 1]])]
-                ),
-            ]
-        )
-
-        client = SuryaOCRClient()
-        client._recognition_predictor = mock_predictor
-        client._detection_predictor = MagicMock()
-
-        pages = [
-            Page(page_no=1, width=10.0, height=20.0, lines=[]),
-            Page(page_no=2, width=10.0, height=20.0, lines=[]),
-        ]
-        result = await client.ocr(
-            pages,
-            files=[(img_a, "image/png"), (img_b, "image/png")],
-            page_metadata=[
-                PageMetadata(file_index=0),
-                PageMetadata(file_index=1),
-            ],
-        )
-
-        assert [p.lines[0].text for p in result.result.pages] == ["a-line", "b-line"]
-
-    async def test_lazy_warm_up_on_first_ocr(self, tmp_path: Path) -> None:
-        img = tmp_path / "x.png"
-        _write_tiny_png(img)
-
-        client = SuryaOCRClient()
-
-        # Use patch.object on the instance's warm_up so we don't need real
-        # Surya module loading.
-        with patch.object(client, "warm_up", autospec=True) as mocked_warm_up:
-            # After warm_up is called, the predictors must be assigned.
-            def fake_warm_up(self: SuryaOCRClient) -> None:
-                self._recognition_predictor = MagicMock(
-                    return_value=[
-                        _make_surya_ocr_result(
-                            [
-                                _make_surya_line(
-                                    "hi", [[0, 0], [1, 0], [1, 1], [0, 1]]
-                                )
-                            ]
-                        )
-                    ]
-                )
-                self._detection_predictor = MagicMock()
-
-            mocked_warm_up.side_effect = lambda: fake_warm_up(client)
-
-            pages = [Page(page_no=1, width=10.0, height=10.0, lines=[])]
-            await client.ocr(
-                pages,
-                files=[(img, "image/png")],
-                page_metadata=[PageMetadata(file_index=0)],
-            )
-            mocked_warm_up.assert_called_once()
-
-
-class TestSelfcheck:
-    async def test_selfcheck_ok_with_mocked_predictors(self) -> None:
-        client = SuryaOCRClient()
-        client._recognition_predictor = MagicMock(
-            return_value=[_make_surya_ocr_result([])]
-        )
-        client._detection_predictor = MagicMock()
-        assert await client.selfcheck() == "ok"
-
-    async def test_selfcheck_fail_when_predictor_raises(self) -> None:
-        client = SuryaOCRClient()
-        client._recognition_predictor = MagicMock(
-            side_effect=RuntimeError("cuda broken")
-        )
-        client._detection_predictor = MagicMock()
-        assert await client.selfcheck() == "fail"
-
-
-def _write_tiny_png(path: Path) -> None:
-    """Write a 2x2 white PNG so PIL can open it."""
-    from PIL import Image
-
-    Image.new("RGB", (2, 2), color="white").save(path, format="PNG")
-
-
-@pytest.mark.parametrize("unused", [None])  # keep pytest happy if file ever runs alone
-def test_module_imports(unused: None) -> None:
-    assert SuryaOCRClient is not None
--- a/tests/unit/test_use_case_bank_statement_header.py
+++ b/tests/unit/test_use_case_bank_statement_header.py
@ -12,7 +12,7 @@ class TestRequest:
    def test_defaults(self) -> None:
        r = Request()
        assert r.use_case_name == "Bank Statement Header"
-        assert r.default_model == "qwen3:14b"
+        assert r.default_model == "gpt-oss:20b"
        # Stable substring for agent/worker tests that want to confirm the
        # prompt is what they think it is.
        assert "extract header metadata" in r.system_prompt
--- a/uv.lock
+++ b/uv.lock
@ -110,79 +110,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/db/3c/33bac158f8ab7f89b2e59426d5fe2e4f63f7ed25df84c036890172b412b5/cfgv-3.5.0-py2.py3-none-any.whl", hash = "sha256:a8dc6b26ad22ff227d2634a65cb388215ce6cc96bbcc5cfde7641ae87e8dacc0", size = 7445, upload-time = "2025-11-19T20:55:50.744Z" },
 ]

-[[package]]
-name = "charset-normalizer"
-version = "3.4.7"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e7/a1/67fe25fac3c7642725500a3f6cfe5821ad557c3abb11c9d20d12c7008d3e/charset_normalizer-3.4.7.tar.gz", hash = "sha256:ae89db9e5f98a11a4bf50407d4363e7b09b31e55bc117b4f7d80aab97ba009e5", size = 144271, upload-time = "2026-04-02T09:28:39.342Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0c/eb/4fc8d0a7110eb5fc9cc161723a34a8a6c200ce3b4fbf681bc86feee22308/charset_normalizer-3.4.7-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:eca9705049ad3c7345d574e3510665cb2cf844c2f2dcfe675332677f081cbd46", size = 311328, upload-time = "2026-04-02T09:26:24.331Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/e3/0fadc706008ac9d7b9b5be6dc767c05f9d3e5df51744ce4cc9605de7b9f4/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6178f72c5508bfc5fd446a5905e698c6212932f25bcdd4b47a757a50605a90e2", size = 208061, upload-time = "2026-04-02T09:26:25.568Z" },
-    { url = "https://files.pythonhosted.org/packages/42/f0/3dd1045c47f4a4604df85ec18ad093912ae1344ac706993aff91d38773a2/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e1421b502d83040e6d7fb2fb18dff63957f720da3d77b2fbd3187ceb63755d7b", size = 229031, upload-time = "2026-04-02T09:26:26.865Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/67/675a46eb016118a2fbde5a277a5d15f4f69d5f3f5f338e5ee2f8948fcf43/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:edac0f1ab77644605be2cbba52e6b7f630731fc42b34cb0f634be1a6eface56a", size = 225239, upload-time = "2026-04-02T09:26:28.044Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/f8/d0118a2f5f23b02cd166fa385c60f9b0d4f9194f574e2b31cef350ad7223/charset_normalizer-3.4.7-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5649fd1c7bade02f320a462fdefd0b4bd3ce036065836d4f42e0de958038e116", size = 216589, upload-time = "2026-04-02T09:26:29.239Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/f1/6d2b0b261b6c4ceef0fcb0d17a01cc5bc53586c2d4796fa04b5c540bc13d/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:203104ed3e428044fd943bc4bf45fa73c0730391f9621e37fe39ecf477b128cb", size = 202733, upload-time = "2026-04-02T09:26:30.5Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/c0/7b1f943f7e87cc3db9626ba17807d042c38645f0a1d4415c7a14afb5591f/charset_normalizer-3.4.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:298930cec56029e05497a76988377cbd7457ba864beeea92ad7e844fe74cd1f1", size = 212652, upload-time = "2026-04-02T09:26:31.709Z" },
-    { url = "https://files.pythonhosted.org/packages/38/dd/5a9ab159fe45c6e72079398f277b7d2b523e7f716acc489726115a910097/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:708838739abf24b2ceb208d0e22403dd018faeef86ddac04319a62ae884c4f15", size = 211229, upload-time = "2026-04-02T09:26:33.282Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/ff/531a1cad5ca855d1c1a8b69cb71abfd6d85c0291580146fda7c82857caa1/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:0f7eb884681e3938906ed0434f20c63046eacd0111c4ba96f27b76084cd679f5", size = 203552, upload-time = "2026-04-02T09:26:34.845Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/4c/a5fb52d528a8ca41f7598cb619409ece30a169fbdf9cdce592e53b46c3a6/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:4dc1e73c36828f982bfe79fadf5919923f8a6f4df2860804db9a98c48824ce8d", size = 230806, upload-time = "2026-04-02T09:26:36.152Z" },
-    { url = "https://files.pythonhosted.org/packages/59/7a/071feed8124111a32b316b33ae4de83d36923039ef8cf48120266844285b/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:aed52fea0513bac0ccde438c188c8a471c4e0f457c2dd20cdbf6ea7a450046c7", size = 212316, upload-time = "2026-04-02T09:26:37.672Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/35/f7dba3994312d7ba508e041eaac39a36b120f32d4c8662b8814dab876431/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:fea24543955a6a729c45a73fe90e08c743f0b3334bbf3201e6c4bc1b0c7fa464", size = 227274, upload-time = "2026-04-02T09:26:38.93Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/2d/a572df5c9204ab7688ec1edc895a73ebded3b023bb07364710b05dd1c9be/charset_normalizer-3.4.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb6d88045545b26da47aa879dd4a89a71d1dce0f0e549b1abcb31dfe4a8eac49", size = 218468, upload-time = "2026-04-02T09:26:40.17Z" },
-    { url = "https://files.pythonhosted.org/packages/86/eb/890922a8b03a568ca2f336c36585a4713c55d4d67bf0f0c78924be6315ca/charset_normalizer-3.4.7-cp312-cp312-win32.whl", hash = "sha256:2257141f39fe65a3fdf38aeccae4b953e5f3b3324f4ff0daf9f15b8518666a2c", size = 148460, upload-time = "2026-04-02T09:26:41.416Z" },
-    { url = "https://files.pythonhosted.org/packages/35/d9/0e7dffa06c5ab081f75b1b786f0aefc88365825dfcd0ac544bdb7b2b6853/charset_normalizer-3.4.7-cp312-cp312-win_amd64.whl", hash = "sha256:5ed6ab538499c8644b8a3e18debabcd7ce684f3fa91cf867521a7a0279cab2d6", size = 159330, upload-time = "2026-04-02T09:26:42.554Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/5d/481bcc2a7c88ea6b0878c299547843b2521ccbc40980cb406267088bc701/charset_normalizer-3.4.7-cp312-cp312-win_arm64.whl", hash = "sha256:56be790f86bfb2c98fb742ce566dfb4816e5a83384616ab59c49e0604d49c51d", size = 147828, upload-time = "2026-04-02T09:26:44.075Z" },
-    { url = "https://files.pythonhosted.org/packages/c1/3b/66777e39d3ae1ddc77ee606be4ec6d8cbd4c801f65e5a1b6f2b11b8346dd/charset_normalizer-3.4.7-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f496c9c3cc02230093d8330875c4c3cdfc3b73612a5fd921c65d39cbcef08063", size = 309627, upload-time = "2026-04-02T09:26:45.198Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/4e/b7f84e617b4854ade48a1b7915c8ccfadeba444d2a18c291f696e37f0d3b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0ea948db76d31190bf08bd371623927ee1339d5f2a0b4b1b4a4439a65298703c", size = 207008, upload-time = "2026-04-02T09:26:46.824Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/bb/ec73c0257c9e11b268f018f068f5d00aa0ef8c8b09f7753ebd5f2880e248/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a277ab8928b9f299723bc1a2dabb1265911b1a76341f90a510368ca44ad9ab66", size = 228303, upload-time = "2026-04-02T09:26:48.397Z" },
-    { url = "https://files.pythonhosted.org/packages/85/fb/32d1f5033484494619f701e719429c69b766bfc4dbc61aa9e9c8c166528b/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3bec022aec2c514d9cf199522a802bd007cd588ab17ab2525f20f9c34d067c18", size = 224282, upload-time = "2026-04-02T09:26:49.684Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/07/330e3a0dda4c404d6da83b327270906e9654a24f6c546dc886a0eb0ffb23/charset_normalizer-3.4.7-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e044c39e41b92c845bc815e5ae4230804e8e7bc29e399b0437d64222d92809dd", size = 215595, upload-time = "2026-04-02T09:26:50.915Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/7c/fc890655786e423f02556e0216d4b8c6bcb6bdfa890160dc66bf52dee468/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:f495a1652cf3fbab2eb0639776dad966c2fb874d79d87ca07f9d5f059b8bd215", size = 201986, upload-time = "2026-04-02T09:26:52.197Z" },
-    { url = "https://files.pythonhosted.org/packages/d8/97/bfb18b3db2aed3b90cf54dc292ad79fdd5ad65c4eae454099475cbeadd0d/charset_normalizer-3.4.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e712b419df8ba5e42b226c510472b37bd57b38e897d3eca5e8cfd410a29fa859", size = 211711, upload-time = "2026-04-02T09:26:53.49Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/a5/a581c13798546a7fd557c82614a5c65a13df2157e9ad6373166d2a3e645d/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7804338df6fcc08105c7745f1502ba68d900f45fd770d5bdd5288ddccb8a42d8", size = 210036, upload-time = "2026-04-02T09:26:54.975Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/bf/b3ab5bcb478e4193d517644b0fb2bf5497fbceeaa7a1bc0f4d5b50953861/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:481551899c856c704d58119b5025793fa6730adda3571971af568f66d2424bb5", size = 202998, upload-time = "2026-04-02T09:26:56.303Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/4e/23efd79b65d314fa320ec6017b4b5834d5c12a58ba4610aa353af2e2f577/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f59099f9b66f0d7145115e6f80dd8b1d847176df89b234a5a6b3f00437aa0832", size = 230056, upload-time = "2026-04-02T09:26:57.554Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/9f/1e1941bc3f0e01df116e68dc37a55c4d249df5e6fa77f008841aef68264f/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:f59ad4c0e8f6bba240a9bb85504faa1ab438237199d4cce5f622761507b8f6a6", size = 211537, upload-time = "2026-04-02T09:26:58.843Z" },
-    { url = "https://files.pythonhosted.org/packages/80/0f/088cbb3020d44428964a6c97fe1edfb1b9550396bf6d278330281e8b709c/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:3dedcc22d73ec993f42055eff4fcfed9318d1eeb9a6606c55892a26964964e48", size = 226176, upload-time = "2026-04-02T09:27:00.437Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/9f/130394f9bbe06f4f63e22641d32fc9b202b7e251c9aef4db044324dac493/charset_normalizer-3.4.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:64f02c6841d7d83f832cd97ccf8eb8a906d06eb95d5276069175c696b024b60a", size = 217723, upload-time = "2026-04-02T09:27:02.021Z" },
-    { url = "https://files.pythonhosted.org/packages/73/55/c469897448a06e49f8fa03f6caae97074fde823f432a98f979cc42b90e69/charset_normalizer-3.4.7-cp313-cp313-win32.whl", hash = "sha256:4042d5c8f957e15221d423ba781e85d553722fc4113f523f2feb7b188cc34c5e", size = 148085, upload-time = "2026-04-02T09:27:03.192Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/78/1b74c5bbb3f99b77a1715c91b3e0b5bdb6fe302d95ace4f5b1bec37b0167/charset_normalizer-3.4.7-cp313-cp313-win_amd64.whl", hash = "sha256:3946fa46a0cf3e4c8cb1cc52f56bb536310d34f25f01ca9b6c16afa767dab110", size = 158819, upload-time = "2026-04-02T09:27:04.454Z" },
-    { url = "https://files.pythonhosted.org/packages/68/86/46bd42279d323deb8687c4a5a811fd548cb7d1de10cf6535d099877a9a9f/charset_normalizer-3.4.7-cp313-cp313-win_arm64.whl", hash = "sha256:80d04837f55fc81da168b98de4f4b797ef007fc8a79ab71c6ec9bc4dd662b15b", size = 147915, upload-time = "2026-04-02T09:27:05.971Z" },
-    { url = "https://files.pythonhosted.org/packages/97/c8/c67cb8c70e19ef1960b97b22ed2a1567711de46c4ddf19799923adc836c2/charset_normalizer-3.4.7-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:c36c333c39be2dbca264d7803333c896ab8fa7d4d6f0ab7edb7dfd7aea6e98c0", size = 309234, upload-time = "2026-04-02T09:27:07.194Z" },
-    { url = "https://files.pythonhosted.org/packages/99/85/c091fdee33f20de70d6c8b522743b6f831a2f1cd3ff86de4c6a827c48a76/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c2aed2e5e41f24ea8ef1590b8e848a79b56f3a5564a65ceec43c9d692dc7d8a", size = 208042, upload-time = "2026-04-02T09:27:08.749Z" },
-    { url = "https://files.pythonhosted.org/packages/87/1c/ab2ce611b984d2fd5d86a5a8a19c1ae26acac6bad967da4967562c75114d/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:54523e136b8948060c0fa0bc7b1b50c32c186f2fceee897a495406bb6e311d2b", size = 228706, upload-time = "2026-04-02T09:27:09.951Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/29/2b1d2cb00bf085f59d29eb773ce58ec2d325430f8c216804a0a5cd83cbca/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:715479b9a2802ecac752a3b0efa2b0b60285cf962ee38414211abdfccc233b41", size = 224727, upload-time = "2026-04-02T09:27:11.175Z" },
-    { url = "https://files.pythonhosted.org/packages/47/5c/032c2d5a07fe4d4855fea851209cca2b6f03ebeb6d4e3afdb3358386a684/charset_normalizer-3.4.7-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bd6c2a1c7573c64738d716488d2cdd3c00e340e4835707d8fdb8dc1a66ef164e", size = 215882, upload-time = "2026-04-02T09:27:12.446Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/c2/356065d5a8b78ed04499cae5f339f091946a6a74f91e03476c33f0ab7100/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_armv7l.whl", hash = "sha256:c45e9440fb78f8ddabcf714b68f936737a121355bf59f3907f4e17721b9d1aae", size = 200860, upload-time = "2026-04-02T09:27:13.721Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/cd/a32a84217ced5039f53b29f460962abb2d4420def55afabe45b1c3c7483d/charset_normalizer-3.4.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3534e7dcbdcf757da6b85a0bbf5b6868786d5982dd959b065e65481644817a18", size = 211564, upload-time = "2026-04-02T09:27:15.272Z" },
-    { url = "https://files.pythonhosted.org/packages/44/86/58e6f13ce26cc3b8f4a36b94a0f22ae2f00a72534520f4ae6857c4b81f89/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e8ac484bf18ce6975760921bb6148041faa8fef0547200386ea0b52b5d27bf7b", size = 211276, upload-time = "2026-04-02T09:27:16.834Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/fe/d17c32dc72e17e155e06883efa84514ca375f8a528ba2546bee73fc4df81/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:a5fe03b42827c13cdccd08e6c0247b6a6d4b5e3cdc53fd1749f5896adcdc2356", size = 201238, upload-time = "2026-04-02T09:27:18.229Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/29/f33daa50b06525a237451cdb6c69da366c381a3dadcd833fa5676bc468b3/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:2d6eb928e13016cea4f1f21d1e10c1cebd5a421bc57ddf5b1142ae3f86824fab", size = 230189, upload-time = "2026-04-02T09:27:19.445Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/6e/52c84015394a6a0bdcd435210a7e944c5f94ea1055f5cc5d56c5fe368e7b/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:e74327fb75de8986940def6e8dee4f127cc9752bee7355bb323cc5b2659b6d46", size = 211352, upload-time = "2026-04-02T09:27:20.79Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/d7/4353be581b373033fb9198bf1da3cf8f09c1082561e8e922aa7b39bf9fe8/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:d6038d37043bced98a66e68d3aa2b6a35505dc01328cd65217cefe82f25def44", size = 227024, upload-time = "2026-04-02T09:27:22.063Z" },
-    { url = "https://files.pythonhosted.org/packages/30/45/99d18aa925bd1740098ccd3060e238e21115fffbfdcb8f3ece837d0ace6c/charset_normalizer-3.4.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:7579e913a5339fb8fa133f6bbcfd8e6749696206cf05acdbdca71a1b436d8e72", size = 217869, upload-time = "2026-04-02T09:27:23.486Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/05/5ee478aa53f4bb7996482153d4bfe1b89e0f087f0ab6b294fcf92d595873/charset_normalizer-3.4.7-cp314-cp314-win32.whl", hash = "sha256:5b77459df20e08151cd6f8b9ef8ef1f961ef73d85c21a555c7eed5b79410ec10", size = 148541, upload-time = "2026-04-02T09:27:25.146Z" },
-    { url = "https://files.pythonhosted.org/packages/48/77/72dcb0921b2ce86420b2d79d454c7022bf5be40202a2a07906b9f2a35c97/charset_normalizer-3.4.7-cp314-cp314-win_amd64.whl", hash = "sha256:92a0a01ead5e668468e952e4238cccd7c537364eb7d851ab144ab6627dbbe12f", size = 159634, upload-time = "2026-04-02T09:27:26.642Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/a3/c2369911cd72f02386e4e340770f6e158c7980267da16af8f668217abaa0/charset_normalizer-3.4.7-cp314-cp314-win_arm64.whl", hash = "sha256:67f6279d125ca0046a7fd386d01b311c6363844deac3e5b069b514ba3e63c246", size = 148384, upload-time = "2026-04-02T09:27:28.271Z" },
-    { url = "https://files.pythonhosted.org/packages/94/09/7e8a7f73d24dba1f0035fbbf014d2c36828fc1bf9c88f84093e57d315935/charset_normalizer-3.4.7-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:effc3f449787117233702311a1b7d8f59cba9ced946ba727bdc329ec69028e24", size = 330133, upload-time = "2026-04-02T09:27:29.474Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/da/96975ddb11f8e977f706f45cddd8540fd8242f71ecdb5d18a80723dcf62c/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fbccdc05410c9ee21bbf16a35f4c1d16123dcdeb8a1d38f33654fa21d0234f79", size = 216257, upload-time = "2026-04-02T09:27:30.793Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/e8/1d63bf8ef2d388e95c64b2098f45f84758f6d102a087552da1485912637b/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:733784b6d6def852c814bce5f318d25da2ee65dd4839a0718641c696e09a2960", size = 234851, upload-time = "2026-04-02T09:27:32.44Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/40/e5ff04233e70da2681fa43969ad6f66ca5611d7e669be0246c4c7aaf6dc8/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a89c23ef8d2c6b27fd200a42aa4ac72786e7c60d40efdc76e6011260b6e949c4", size = 233393, upload-time = "2026-04-02T09:27:34.03Z" },
-    { url = "https://files.pythonhosted.org/packages/be/c1/06c6c49d5a5450f76899992f1ee40b41d076aee9279b49cf9974d2f313d5/charset_normalizer-3.4.7-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6c114670c45346afedc0d947faf3c7f701051d2518b943679c8ff88befe14f8e", size = 223251, upload-time = "2026-04-02T09:27:35.369Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/9f/f2ff16fb050946169e3e1f82134d107e5d4ae72647ec8a1b1446c148480f/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:a180c5e59792af262bf263b21a3c49353f25945d8d9f70628e73de370d55e1e1", size = 206609, upload-time = "2026-04-02T09:27:36.661Z" },
-    { url = "https://files.pythonhosted.org/packages/69/d5/a527c0cd8d64d2eab7459784fb4169a0ac76e5a6fc5237337982fd61347e/charset_normalizer-3.4.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:3c9a494bc5ec77d43cea229c4f6db1e4d8fe7e1bbffa8b6f0f0032430ff8ab44", size = 220014, upload-time = "2026-04-02T09:27:38.019Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/80/8a7b8104a3e203074dc9aa2c613d4b726c0e136bad1cc734594b02867972/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8d828b6667a32a728a1ad1d93957cdf37489c57b97ae6c4de2860fa749b8fc1e", size = 218979, upload-time = "2026-04-02T09:27:39.37Z" },
-    { url = "https://files.pythonhosted.org/packages/02/9a/b759b503d507f375b2b5c153e4d2ee0a75aa215b7f2489cf314f4541f2c0/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_armv7l.whl", hash = "sha256:cf1493cd8607bec4d8a7b9b004e699fcf8f9103a9284cc94962cb73d20f9d4a3", size = 209238, upload-time = "2026-04-02T09:27:40.722Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/4e/0f3f5d47b86bdb79256e7290b26ac847a2832d9a4033f7eb2cd4bcf4bb5b/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:0c96c3b819b5c3e9e165495db84d41914d6894d55181d2d108cc1a69bfc9cce0", size = 236110, upload-time = "2026-04-02T09:27:42.33Z" },
-    { url = "https://files.pythonhosted.org/packages/96/23/bce28734eb3ed2c91dcf93abeb8a5cf393a7b2749725030bb630e554fdd8/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:752a45dc4a6934060b3b0dab47e04edc3326575f82be64bc4fc293914566503e", size = 219824, upload-time = "2026-04-02T09:27:43.924Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/6f/6e897c6984cc4d41af319b077f2f600fc8214eb2fe2d6bcb79141b882400/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:8778f0c7a52e56f75d12dae53ae320fae900a8b9b4164b981b9c5ce059cd1fcb", size = 233103, upload-time = "2026-04-02T09:27:45.348Z" },
-    { url = "https://files.pythonhosted.org/packages/76/22/ef7bd0fe480a0ae9b656189ec00744b60933f68b4f42a7bb06589f6f576a/charset_normalizer-3.4.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:ce3412fbe1e31eb81ea42f4169ed94861c56e643189e1e75f0041f3fe7020abe", size = 225194, upload-time = "2026-04-02T09:27:46.706Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/a7/0e0ab3e0b5bc1219bd80a6a0d4d72ca74d9250cb2382b7c699c147e06017/charset_normalizer-3.4.7-cp314-cp314t-win32.whl", hash = "sha256:c03a41a8784091e67a39648f70c5f97b5b6a37f216896d44d2cdcb82615339a0", size = 159827, upload-time = "2026-04-02T09:27:48.053Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/1d/29d32e0fb40864b1f878c7f5a0b343ae676c6e2b271a2d55cc3a152391da/charset_normalizer-3.4.7-cp314-cp314t-win_amd64.whl", hash = "sha256:03853ed82eeebbce3c2abfdbc98c96dc205f32a79627688ac9a27370ea61a49c", size = 174168, upload-time = "2026-04-02T09:27:49.795Z" },
-    { url = "https://files.pythonhosted.org/packages/de/32/d92444ad05c7a6e41fb2036749777c163baf7a0301a040cb672d6b2b1ae9/charset_normalizer-3.4.7-cp314-cp314t-win_arm64.whl", hash = "sha256:c35abb8bfff0185efac5878da64c45dafd2b37fb0383add1be155a763c1f083d", size = 153018, upload-time = "2026-04-02T09:27:51.116Z" },
-    { url = "https://files.pythonhosted.org/packages/db/8f/61959034484a4a7c527811f4721e75d02d653a35afb0b6054474d8185d4c/charset_normalizer-3.4.7-py3-none-any.whl", hash = "sha256:3dce51d0f5e7951f8bb4900c257dad282f49190fdbebecd4ba99bcc41fef404d", size = 61958, upload-time = "2026-04-02T09:28:37.794Z" },
-]
-
 [[package]]
 name = "click"
 version = "8.3.2"
@ -481,21 +408,22 @@ wheels = [

 [[package]]
 name = "huggingface-hub"
-version = "0.36.2"
+version = "1.11.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "filelock" },
    { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" },
+    { name = "httpx" },
    { name = "packaging" },
    { name = "pyyaml" },
-    { name = "requests" },
    { name = "tqdm" },
+    { name = "typer" },
    { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/7c/b7/8cb61d2eece5fb05a83271da168186721c450eb74e3c31f7ef3169fa475b/huggingface_hub-0.36.2.tar.gz", hash = "sha256:1934304d2fb224f8afa3b87007d58501acfda9215b334eed53072dd5e815ff7a", size = 649782, upload-time = "2026-02-06T09:24:13.098Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/89/e7aa12d8a6b9259bed10671abb25ae6fa437c0f88a86ecbf59617bae7759/huggingface_hub-1.11.0.tar.gz", hash = "sha256:15fb3713c7f9cdff7b808a94fd91664f661ab142796bb48c9cd9493e8d166278", size = 761749, upload-time = "2026-04-16T13:07:39.73Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a8/af/48ac8483240de756d2438c380746e7130d1c6f75802ef22f3c6d49982787/huggingface_hub-0.36.2-py3-none-any.whl", hash = "sha256:48f0c8eac16145dfce371e9d2d7772854a4f591bcb56c9cf548accf531d54270", size = 566395, upload-time = "2026-02-06T09:24:11.133Z" },
+    { url = "https://files.pythonhosted.org/packages/37/02/4f3f8997d1ea7fe0146b343e5e14bd065fa87af790d07e5576d31b31cc18/huggingface_hub-1.11.0-py3-none-any.whl", hash = "sha256:42a6de0afbfeb5e022222d36398f029679db4eb4778801aafda32257ae9131ab", size = 645499, upload-time = "2026-04-16T13:07:37.716Z" },
 ]

 [[package]]
@ -566,8 +494,8 @@ requires-dist = [
    { name = "python-magic", specifier = ">=0.4.27" },
    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8" },
    { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.36" },
-    { name = "surya-ocr", marker = "extra == 'ocr'", specifier = ">=0.17,<0.18" },
-    { name = "torch", marker = "extra == 'ocr'", specifier = ">=2.7" },
+    { name = "surya-ocr", marker = "extra == 'ocr'", specifier = ">=0.9" },
+    { name = "torch", marker = "extra == 'ocr'", specifier = ">=2.4" },
    { name = "uvicorn", extras = ["standard"], specifier = ">=0.32" },
 ]
 provides-extras = ["ocr", "dev"]
@ -665,6 +593,18 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/68/a5/19d7aaa7e433713ffe881df33705925a196afb9532efc8475d26593921a6/mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77", size = 78503, upload-time = "2026-04-14T20:19:53.233Z" },
 ]

+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
 [[package]]
 name = "markupsafe"
 version = "3.0.3"
@ -728,6 +668,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
 ]

+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
 [[package]]
 name = "mpmath"
 version = "1.3.0"
@ -1485,18 +1434,16 @@ wheels = [
 ]

 [[package]]
-name = "requests"
-version = "2.33.1"
+name = "rich"
+version = "15.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "certifi" },
-    { name = "charset-normalizer" },
-    { name = "idna" },
-    { name = "urllib3" },
+    { name = "markdown-it-py" },
+    { name = "pygments" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/5f/a4/98b9c7c6428a668bf7e42ebb7c79d576a1c3c1e3ae2d47e674b468388871/requests-2.33.1.tar.gz", hash = "sha256:18817f8c57c6263968bc123d237e3b8b08ac046f5456bd1e307ee8f4250d3517", size = 134120, upload-time = "2026-03-30T16:09:15.531Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/c0/8f/0722ca900cc807c13a6a0c696dacf35430f72e0ec571c4275d2371fca3e9/rich-15.0.0.tar.gz", hash = "sha256:edd07a4824c6b40189fb7ac9bc4c52536e9780fbbfbddf6f1e2502c31b068c36", size = 230680, upload-time = "2026-04-12T08:24:00.75Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/8e/7540e8a2036f79a125c1d2ebadf69ed7901608859186c856fa0388ef4197/requests-2.33.1-py3-none-any.whl", hash = "sha256:4e6d1ef462f3626a1f0a0a9c42dd93c63bad33f9f1c1937509b8c5c8718ab56a", size = 64947, upload-time = "2026-03-30T16:09:13.83Z" },
+    { url = "https://files.pythonhosted.org/packages/82/3b/64d4899d73f91ba49a8c18a8ff3f0ea8f1c1d75481760df8c68ef5235bf5/rich-15.0.0-py3-none-any.whl", hash = "sha256:33bd4ef74232fb73fe9279a257718407f169c09b78a87ad3d296f548e27de0bb", size = 310654, upload-time = "2026-04-12T08:24:02.83Z" },
 ]

 [[package]]
@ -1555,6 +1502,15 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/e1/e3/c164c88b2e5ce7b24d667b9bd83589cf4f3520d97cad01534cd3c4f55fdb/setuptools-81.0.0-py3-none-any.whl", hash = "sha256:fdd925d5c5d9f62e4b74b30d6dd7828ce236fd6ed998a08d81de62ce5a6310d6", size = 1062021, upload-time = "2026-02-06T21:10:37.175Z" },
 ]

+[[package]]
+name = "shellingham"
+version = "1.5.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
@ -1747,23 +1703,22 @@ wheels = [

 [[package]]
 name = "transformers"
-version = "4.57.6"
+version = "5.5.4"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock" },
    { name = "huggingface-hub" },
    { name = "numpy" },
    { name = "packaging" },
    { name = "pyyaml" },
    { name = "regex" },
-    { name = "requests" },
    { name = "safetensors" },
    { name = "tokenizers" },
    { name = "tqdm" },
+    { name = "typer" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/c4/35/67252acc1b929dc88b6602e8c4a982e64f31e733b804c14bc24b47da35e6/transformers-4.57.6.tar.gz", hash = "sha256:55e44126ece9dc0a291521b7e5492b572e6ef2766338a610b9ab5afbb70689d3", size = 10134912, upload-time = "2026-01-16T10:38:39.284Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a5/1e/1e244ab2ab50a863e6b52cc55761910567fa532b69a6740f6e99c5fdbd98/transformers-5.5.4.tar.gz", hash = "sha256:2e67cadba81fc7608cc07c4dd54f524820bc3d95b1cabd0ef3db7733c4f8b82e", size = 8227649, upload-time = "2026-04-13T16:55:55.181Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/03/b8/e484ef633af3887baeeb4b6ad12743363af7cce68ae51e938e00aaa0529d/transformers-4.57.6-py3-none-any.whl", hash = "sha256:4c9e9de11333ddfe5114bc872c9f370509198acf0b87a832a0ab9458e2bd0550", size = 11993498, upload-time = "2026-01-16T10:38:31.289Z" },
+    { url = "https://files.pythonhosted.org/packages/29/fb/162a66789c65e5afa3b051309240c26bf37fbc8fea285b4546ae747995a2/transformers-5.5.4-py3-none-any.whl", hash = "sha256:0bd6281b82966fe5a7a16f553ea517a9db1dee6284d7cb224dfd88fc0dd1c167", size = 10236696, upload-time = "2026-04-13T16:55:51.497Z" },
 ]

 [[package]]
@ -1783,6 +1738,21 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/f6/56/6113c23ff46c00aae423333eb58b3e60bdfe9179d542781955a5e1514cb3/triton-3.6.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:46bd1c1af4b6704e554cad2eeb3b0a6513a980d470ccfa63189737340c7746a7", size = 188397994, upload-time = "2026-01-20T16:01:14.236Z" },
 ]

+[[package]]
+name = "typer"
+version = "0.24.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-doc" },
+    { name = "click" },
+    { name = "rich" },
+    { name = "shellingham" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"
@ -1804,15 +1774,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
 ]

-[[package]]
-name = "urllib3"
-version = "2.6.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/c7/24/5f1b3bdffd70275f6661c76461e25f024d5a38a46f04aaca912426a2b1d3/urllib3-2.6.3.tar.gz", hash = "sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed", size = 435556, upload-time = "2026-01-07T16:24:43.925Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/39/08/aaaad47bc4e9dc8c725e68f9d04865dbcb2052843ff09c97b08904852d84/urllib3-2.6.3-py3-none-any.whl", hash = "sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4", size = 131584, upload-time = "2026-01-07T16:24:42.685Z" },
-]
-
 [[package]]
 name = "uvicorn"
 version = "0.44.0"