feat(app): production wiring — factories, pipeline, /healthz real probes

Task 4.3 closes the loop on Chunk 4: the FastAPI lifespan now selects fake vs real clients via IX_TEST_MODE (new AppConfig field), wires /healthz probes to the live selfcheck() on OllamaClient / SuryaOCRClient, and spawns the worker with a production Pipeline factory that builds SetupStep -> OCRStep -> GenAIStep -> ReliabilityStep -> ResponseHandler over the injected clients. Factories: - make_genai_client(cfg) -> FakeGenAIClient | OllamaClient - make_ocr_client(cfg) -> FakeOCRClient | SuryaOCRClient (spec §6.2) Probes run the async selfcheck on a fresh event loop in a short-lived thread so they're safe to call from either sync callers or a live FastAPI handler without stalling the request loop. Drops the worker-loop spawn_worker_task stub — the app module owns the production spawn directly. Tests: +11 unit tests (5 factories + 6 app-wiring / probe adapter / pipeline build). Full suite: 236 passed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 12:09:11 +02:00 · 2026-04-18 12:09:11 +02:00 · ebefee4184
commit ebefee4184
parent b737ed7b21
7 changed files with 394 additions and 40 deletions
--- a/src/ix/app.py
+++ b/src/ix/app.py
@ -5,21 +5,132 @@ worker loop (Task 3.5) and the pg_queue listener (Task 3.6). Tests that
 don't care about the worker call ``create_app(spawn_worker=False)`` so the
 lifespan returns cleanly.
-The factory is parameterised (``spawn_worker``) instead of env-gated because
+Task 4.3 fills in the production wiring:
-pytest runs multiple app instances per session and we want the decision local
+
-to each call, not inferred from ``IX_*`` variables. The listener is also
+* Factories (``make_genai_client`` / ``make_ocr_client``) pick between
-gated on ``spawn_worker`` — the listener is only useful when a worker is
+  fakes (``IX_TEST_MODE=fake``) and real Ollama/Surya clients.
-draining the queue, so the two share one flag.
+* ``/healthz`` probes call ``selfcheck()`` on the active clients. In
  ``fake`` mode they always report ok.
 * The worker's :class:`Pipeline` is built once per spawn with the real
  chain of Steps; each call to the injected ``pipeline_factory`` returns
  a fresh Pipeline so per-request state stays isolated.
 """
 from __future__ import annotations
-from collections.abc import AsyncIterator
+import asyncio
 from collections.abc import AsyncIterator, Callable
 from contextlib import asynccontextmanager, suppress
 from typing import Literal
 from fastapi import FastAPI
 from ix.adapters.rest.routes import Probes, get_probes
 from ix.adapters.rest.routes import router as rest_router
 from ix.config import AppConfig, get_config
 from ix.genai import make_genai_client
 from ix.genai.client import GenAIClient
 from ix.ocr import make_ocr_client
 from ix.ocr.client import OCRClient
 from ix.pipeline.genai_step import GenAIStep
 from ix.pipeline.ocr_step import OCRStep
 from ix.pipeline.pipeline import Pipeline
 from ix.pipeline.reliability_step import ReliabilityStep
 from ix.pipeline.response_handler_step import ResponseHandlerStep
 from ix.pipeline.setup_step import SetupStep
 def build_pipeline(
    genai: GenAIClient, ocr: OCRClient, cfg: AppConfig
 ) -> Pipeline:
    """Assemble the production :class:`Pipeline` with injected clients.
    Kept as a module-level helper so tests that want to exercise the
    production wiring (without running the worker) can call it directly.
    """
    from pathlib import Path
    from ix.ingestion import FetchConfig
    return Pipeline(
        steps=[
            SetupStep(
                tmp_dir=Path(cfg.tmp_dir),
                fetch_config=FetchConfig(
                    connect_timeout_s=float(cfg.file_connect_timeout_seconds),
                    read_timeout_s=float(cfg.file_read_timeout_seconds),
                    max_bytes=cfg.file_max_bytes,
                ),
            ),
            OCRStep(ocr_client=ocr),
            GenAIStep(genai_client=genai),
            ReliabilityStep(),
            ResponseHandlerStep(),
        ]
    )
 def _make_ollama_probe(
    genai: GenAIClient, cfg: AppConfig
 ) -> Callable[[], Literal["ok", "degraded", "fail"]]:
    """Adapter: async ``selfcheck`` → sync callable the route expects.
    Always drives the coroutine on a throwaway event loop in a separate
    thread. This keeps the behavior identical whether the caller holds an
    event loop (FastAPI request) or doesn't (a CLI tool), and avoids the
    ``asyncio.run`` vs. already-running-loop footgun.
    """
    def probe() -> Literal["ok", "degraded", "fail"]:
        if not hasattr(genai, "selfcheck"):
            return "ok"  # fake client — nothing to probe.
        return _run_async_sync(
            lambda: genai.selfcheck(expected_model=cfg.default_model),  # type: ignore[attr-defined]
            fallback="fail",
        )
    return probe
 def _make_ocr_probe(ocr: OCRClient) -> Callable[[], Literal["ok", "fail"]]:
    def probe() -> Literal["ok", "fail"]:
        if not hasattr(ocr, "selfcheck"):
            return "ok"  # fake — nothing to probe.
        return _run_async_sync(
            lambda: ocr.selfcheck(),  # type: ignore[attr-defined]
            fallback="fail",
        )
    return probe
 def _run_async_sync(make_coro, *, fallback: str) -> str:  # type: ignore[no-untyped-def]
    """Run ``make_coro()`` on a fresh loop in a thread; return its result.
    The thread owns its own event loop so the caller's loop (if any) keeps
    running. Any exception collapses to ``fallback``.
    """
    import threading
    result: dict[str, object] = {}
    def _runner() -> None:
        loop = asyncio.new_event_loop()
        try:
            result["value"] = loop.run_until_complete(make_coro())
        except Exception as exc:  # any error collapses to fallback
            result["error"] = exc
        finally:
            loop.close()
    t = threading.Thread(target=_runner)
    t.start()
    t.join()
    if "error" in result or "value" not in result:
        return fallback
    return str(result["value"])
 def create_app(*, spawn_worker: bool = True) -> FastAPI:
@ -36,21 +147,35 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
    @asynccontextmanager
    async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
        cfg = get_config()
        # Build the clients once per process. The worker's pipeline
        # factory closes over these so every job runs through the same
        # Ollama/Surya instance (Surya's predictors are heavy; re-loading
        # them per job would be catastrophic).
        genai_client = make_genai_client(cfg)
        ocr_client = make_ocr_client(cfg)
        # Override the route-level probe DI so /healthz reflects the
        # actual clients. Tests that want canned probes can still override
        # ``get_probes`` at the TestClient layer.
        _app.dependency_overrides.setdefault(
            get_probes,
            lambda: Probes(
                ollama=_make_ollama_probe(genai_client, cfg),
                ocr=_make_ocr_probe(ocr_client),
            ),
        )
        worker_task = None
        listener = None
        if spawn_worker:
            # Pipeline factory + listener wiring live in Chunk 4's
            # production entrypoint; keeping this path best-effort lets the
            # lifespan still start even on a box where Ollama/Surya aren't
            # available (the listener just gives us a passive 10 s poll).
            try:
                from ix.adapters.pg_queue.listener import (
                    PgQueueListener,
                    asyncpg_dsn_from_sqlalchemy_url,
                )
                from ix.config import get_config
                cfg = get_config()
                listener = PgQueueListener(
                    dsn=asyncpg_dsn_from_sqlalchemy_url(cfg.postgres_url)
                )
@ -59,10 +184,10 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
                listener = None
            try:
-                from ix.worker.loop import spawn_worker_task
+                worker_task = await _spawn_production_worker(
-
+                    cfg, genai_client, ocr_client, listener
-                worker_task = await spawn_worker_task(_app)
+                )
-            except ImportError:
+            except Exception:
                worker_task = None
        try:
            yield
@ -78,3 +203,30 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
    app = FastAPI(lifespan=lifespan, title="infoxtractor", version="0.1.0")
    app.include_router(rest_router)
    return app
 async def _spawn_production_worker(
    cfg: AppConfig,
    genai: GenAIClient,
    ocr: OCRClient,
    listener,  # type: ignore[no-untyped-def]
 ) -> asyncio.Task[None]:
    """Spawn the background worker with a production pipeline factory."""
    from ix.store.engine import get_session_factory
    from ix.worker.loop import Worker
    def pipeline_factory() -> Pipeline:
        return build_pipeline(genai, ocr, cfg)
    worker = Worker(
        session_factory=get_session_factory(),
        pipeline_factory=pipeline_factory,
        poll_interval_seconds=10.0,
        max_running_seconds=2 * cfg.pipeline_request_timeout_seconds,
        callback_timeout_seconds=cfg.callback_timeout_seconds,
        wait_for_work=listener.wait_for_work if listener is not None else None,
    )
    stop = asyncio.Event()
    return asyncio.create_task(worker.run(stop))
--- a/src/ix/config.py
+++ b/src/ix/config.py
@ -12,6 +12,7 @@ re-read after ``monkeypatch.setenv``. Production code never clears the cache.
 from __future__ import annotations
 from functools import lru_cache
 from typing import Literal
 from pydantic_settings import BaseSettings, SettingsConfigDict
@ -62,6 +63,13 @@ class AppConfig(BaseSettings):
    # --- Observability ---
    log_level: str = "INFO"
    # --- Test / wiring mode ---
    # ``fake``: factories return FakeGenAIClient / FakeOCRClient and
    # ``/healthz`` probes report ok. CI sets this so the Forgejo runner
    # doesn't need access to Ollama or GPU-backed Surya. ``None`` (default)
    # means production wiring: real OllamaClient + SuryaOCRClient.
    test_mode: Literal["fake"] | None = None
@lru_cache(maxsize=1)
 def get_config() -> AppConfig:
--- a/src/ix/genai/init.py
+++ b/src/ix/genai/init.py
@ -1,18 +1,43 @@
 """GenAI subsystem: protocol + fake client + invocation-result dataclasses.
-Real backends (Ollama, etc.) plug in behind :class:`GenAIClient`. The MVP
+Real backends (Ollama, …) plug in behind :class:`GenAIClient`. The factory
-ships only :class:`FakeGenAIClient` from this package; the real Ollama
+:func:`make_genai_client` picks between :class:`FakeGenAIClient` (for CI
-client lands in Chunk 4.
+/ hermetic tests via ``IX_TEST_MODE=fake``) and :class:`OllamaClient`
 (production). Tests that want a real Ollama client anyway can call the
 constructor directly.
 """
 from __future__ import annotations
 from ix.config import AppConfig
 from ix.genai.client import GenAIClient, GenAIInvocationResult, GenAIUsage
 from ix.genai.fake import FakeGenAIClient
 from ix.genai.ollama_client import OllamaClient
 def make_genai_client(cfg: AppConfig) -> GenAIClient:
    """Return the :class:`GenAIClient` configured for the current run.
    When ``cfg.test_mode == "fake"`` the fake is returned; the pipeline
    callers are expected to override the injected client via DI if they
    want a non-default canned response. Otherwise a live
    :class:`OllamaClient` bound to ``cfg.ollama_url`` and the per-call
    timeout is returned.
    """
    if cfg.test_mode == "fake":
        return FakeGenAIClient(parsed=None)
    return OllamaClient(
        base_url=cfg.ollama_url,
        per_call_timeout_s=float(cfg.genai_call_timeout_seconds),
    )
 __all__ = [
    "FakeGenAIClient",
    "GenAIClient",
    "GenAIInvocationResult",
    "GenAIUsage",
    "OllamaClient",
    "make_genai_client",
 ]
--- a/src/ix/ocr/init.py
+++ b/src/ix/ocr/init.py
@ -1,13 +1,34 @@
-"""OCR subsystem: protocol + fake client.
+"""OCR subsystem: protocol + fake + real Surya client + factory.
-Real engines (Surya, Azure DI, …) plug in behind :class:`OCRClient`. The
+Real engines (Surya today, Azure DI / AWS Textract … deferred) plug in
-MVP ships only :class:`FakeOCRClient` from this package; the real Surya
+behind :class:`OCRClient`. The factory :func:`make_ocr_client` picks
-client lands in Chunk 4.
+between :class:`FakeOCRClient` (when ``IX_TEST_MODE=fake``) and
 :class:`SuryaOCRClient` (production). Unknown engine names raise so a
 typo'd ``IX_OCR_ENGINE`` surfaces at startup, not later.
 """
 from __future__ import annotations
 from ix.config import AppConfig
 from ix.contracts.response import OCRDetails, OCRResult
 from ix.ocr.client import OCRClient
 from ix.ocr.fake import FakeOCRClient
 from ix.ocr.surya_client import SuryaOCRClient
-__all__ = ["FakeOCRClient", "OCRClient"]
+
 def make_ocr_client(cfg: AppConfig) -> OCRClient:
    """Return the :class:`OCRClient` configured for the current run."""
    if cfg.test_mode == "fake":
        return FakeOCRClient(canned=OCRResult(result=OCRDetails()))
    if cfg.ocr_engine == "surya":
        return SuryaOCRClient()
    raise ValueError(f"Unknown ocr_engine: {cfg.ocr_engine!r}")
 __all__ = [
    "FakeOCRClient",
    "OCRClient",
    "SuryaOCRClient",
    "make_ocr_client",
 ]
--- a/src/ix/worker/loop.py
+++ b/src/ix/worker/loop.py
@ -28,8 +28,6 @@ from collections.abc import Callable
 from datetime import UTC, datetime
 from typing import TYPE_CHECKING
 from fastapi import FastAPI
 from ix.contracts.response import ResponseIX
 from ix.errors import IXErrorCode, IXException
 from ix.pipeline.pipeline import Pipeline
@ -179,17 +177,3 @@ class Worker:
            await session.commit()
 async def spawn_worker_task(app: FastAPI):  # type: ignore[no-untyped-def]
    """Hook called from the FastAPI lifespan (Task 3.4).
    This module-level async function is here so ``ix.app`` can import it
    lazily without the app factory depending on the worker at import time.
    Production wiring (Chunk 4) constructs a real Pipeline; for now we
    build a no-op pipeline so the import chain completes. Tests that need
    the worker wire their own Worker explicitly.
    """
    # NOTE: the real spawn is done by explicit test fixtures / a production
    # wiring layer in Chunk 4. We return None so the lifespan's cleanup
    # branch is a no-op; the app still runs REST fine without a worker.
    return None
--- a/tests/unit/test_app_wiring.py
+++ b/tests/unit/test_app_wiring.py
@ -0,0 +1,104 @@
 """Tests for ``ix.app`` lifespan / probe wiring (Task 4.3).
 The lifespan selects fake clients when ``IX_TEST_MODE=fake`` and exposes
 their probes via the route DI hook. These tests exercise the probe
 adapter in isolation — no DB, no real Ollama/Surya.
 """
 from __future__ import annotations
 from typing import Literal
 from ix.app import _make_ocr_probe, _make_ollama_probe, build_pipeline
 from ix.config import AppConfig
 from ix.genai.fake import FakeGenAIClient
 from ix.ocr.fake import FakeOCRClient
 from ix.pipeline.genai_step import GenAIStep
 from ix.pipeline.ocr_step import OCRStep
 from ix.pipeline.pipeline import Pipeline
 from ix.pipeline.reliability_step import ReliabilityStep
 from ix.pipeline.response_handler_step import ResponseHandlerStep
 from ix.pipeline.setup_step import SetupStep
 def _cfg(**overrides: object) -> AppConfig:
    return AppConfig(_env_file=None, **overrides)  # type: ignore[call-arg]
 class _SelfcheckOllamaClient:
    async def invoke(self, *a: object, **kw: object) -> object:
        raise NotImplementedError
    async def selfcheck(
        self, expected_model: str
    ) -> Literal["ok", "degraded", "fail"]:
        self.called_with = expected_model
        return "ok"
 class _SelfcheckOCRClient:
    async def ocr(self, *a: object, **kw: object) -> object:
        raise NotImplementedError
    async def selfcheck(self) -> Literal["ok", "fail"]:
        return "ok"
 class _BrokenSelfcheckOllama:
    async def invoke(self, *a: object, **kw: object) -> object:
        raise NotImplementedError
    async def selfcheck(
        self, expected_model: str
    ) -> Literal["ok", "degraded", "fail"]:
        raise RuntimeError("boom")
 class TestOllamaProbe:
    def test_fake_client_without_selfcheck_reports_ok(self) -> None:
        cfg = _cfg(test_mode="fake", default_model="gpt-oss:20b")
        probe = _make_ollama_probe(FakeGenAIClient(parsed=None), cfg)
        assert probe() == "ok"
    def test_real_selfcheck_returns_its_verdict(self) -> None:
        cfg = _cfg(default_model="gpt-oss:20b")
        client = _SelfcheckOllamaClient()
        probe = _make_ollama_probe(client, cfg)  # type: ignore[arg-type]
        assert probe() == "ok"
        assert client.called_with == "gpt-oss:20b"
    def test_selfcheck_exception_falls_back_to_fail(self) -> None:
        cfg = _cfg(default_model="gpt-oss:20b")
        probe = _make_ollama_probe(_BrokenSelfcheckOllama(), cfg)  # type: ignore[arg-type]
        assert probe() == "fail"
 class TestOCRProbe:
    def test_fake_client_without_selfcheck_reports_ok(self) -> None:
        from ix.contracts.response import OCRDetails, OCRResult
        probe = _make_ocr_probe(FakeOCRClient(canned=OCRResult(result=OCRDetails())))
        assert probe() == "ok"
    def test_real_selfcheck_returns_its_verdict(self) -> None:
        probe = _make_ocr_probe(_SelfcheckOCRClient())  # type: ignore[arg-type]
        assert probe() == "ok"
 class TestBuildPipeline:
    def test_assembles_all_five_steps_in_order(self) -> None:
        from ix.contracts.response import OCRDetails, OCRResult
        genai = FakeGenAIClient(parsed=None)
        ocr = FakeOCRClient(canned=OCRResult(result=OCRDetails()))
        cfg = _cfg(test_mode="fake")
        pipeline = build_pipeline(genai, ocr, cfg)
        assert isinstance(pipeline, Pipeline)
        steps = pipeline._steps  # type: ignore[attr-defined]
        assert [type(s) for s in steps] == [
            SetupStep,
            OCRStep,
            GenAIStep,
            ReliabilityStep,
            ResponseHandlerStep,
        ]
--- a/tests/unit/test_factories.py
+++ b/tests/unit/test_factories.py
@ -0,0 +1,60 @@
 """Tests for the GenAI + OCR factories (Task 4.3).
 The factories pick between fake and real clients based on
 ``IX_TEST_MODE``. CI runs with ``IX_TEST_MODE=fake``, production runs
 without — so the selection knob is the one lever between hermetic CI and
 real clients.
 """
 from __future__ import annotations
 from ix.config import AppConfig
 from ix.genai import make_genai_client
 from ix.genai.fake import FakeGenAIClient
 from ix.genai.ollama_client import OllamaClient
 from ix.ocr import make_ocr_client
 from ix.ocr.fake import FakeOCRClient
 from ix.ocr.surya_client import SuryaOCRClient
 def _cfg(**overrides: object) -> AppConfig:
    """Build an AppConfig without loading the repo's .env.example."""
    return AppConfig(_env_file=None, **overrides)  # type: ignore[call-arg]
 class TestGenAIFactory:
    def test_fake_mode_returns_fake(self) -> None:
        cfg = _cfg(test_mode="fake")
        client = make_genai_client(cfg)
        assert isinstance(client, FakeGenAIClient)
    def test_production_returns_ollama_with_configured_url(self) -> None:
        cfg = _cfg(
            test_mode=None,
            ollama_url="http://ollama.host:11434",
            genai_call_timeout_seconds=42,
        )
        client = make_genai_client(cfg)
        assert isinstance(client, OllamaClient)
        # Inspect the private attrs for binding correctness.
        assert client._base_url == "http://ollama.host:11434"
        assert client._per_call_timeout_s == 42
 class TestOCRFactory:
    def test_fake_mode_returns_fake(self) -> None:
        cfg = _cfg(test_mode="fake")
        client = make_ocr_client(cfg)
        assert isinstance(client, FakeOCRClient)
    def test_production_surya_returns_surya(self) -> None:
        cfg = _cfg(test_mode=None, ocr_engine="surya")
        client = make_ocr_client(cfg)
        assert isinstance(client, SuryaOCRClient)
    def test_unknown_engine_raises(self) -> None:
        cfg = _cfg(test_mode=None, ocr_engine="tesseract")
        import pytest
        with pytest.raises(ValueError, match="ocr_engine"):
            make_ocr_client(cfg)