Task 4.3 closes the loop on Chunk 4: the FastAPI lifespan now selects fake vs real clients via IX_TEST_MODE (new AppConfig field), wires /healthz probes to the live selfcheck() on OllamaClient / SuryaOCRClient, and spawns the worker with a production Pipeline factory that builds SetupStep -> OCRStep -> GenAIStep -> ReliabilityStep -> ResponseHandler over the injected clients. Factories: - make_genai_client(cfg) -> FakeGenAIClient | OllamaClient - make_ocr_client(cfg) -> FakeOCRClient | SuryaOCRClient (spec §6.2) Probes run the async selfcheck on a fresh event loop in a short-lived thread so they're safe to call from either sync callers or a live FastAPI handler without stalling the request loop. Drops the worker-loop spawn_worker_task stub — the app module owns the production spawn directly. Tests: +11 unit tests (5 factories + 6 app-wiring / probe adapter / pipeline build). Full suite: 236 passed. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
232 lines
7.8 KiB
Python
232 lines
7.8 KiB
Python
"""FastAPI app factory + lifespan.
|
|
|
|
``create_app()`` wires the REST router on top of a lifespan that spawns the
|
|
worker loop (Task 3.5) and the pg_queue listener (Task 3.6). Tests that
|
|
don't care about the worker call ``create_app(spawn_worker=False)`` so the
|
|
lifespan returns cleanly.
|
|
|
|
Task 4.3 fills in the production wiring:
|
|
|
|
* Factories (``make_genai_client`` / ``make_ocr_client``) pick between
|
|
fakes (``IX_TEST_MODE=fake``) and real Ollama/Surya clients.
|
|
* ``/healthz`` probes call ``selfcheck()`` on the active clients. In
|
|
``fake`` mode they always report ok.
|
|
* The worker's :class:`Pipeline` is built once per spawn with the real
|
|
chain of Steps; each call to the injected ``pipeline_factory`` returns
|
|
a fresh Pipeline so per-request state stays isolated.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
from collections.abc import AsyncIterator, Callable
|
|
from contextlib import asynccontextmanager, suppress
|
|
from typing import Literal
|
|
|
|
from fastapi import FastAPI
|
|
|
|
from ix.adapters.rest.routes import Probes, get_probes
|
|
from ix.adapters.rest.routes import router as rest_router
|
|
from ix.config import AppConfig, get_config
|
|
from ix.genai import make_genai_client
|
|
from ix.genai.client import GenAIClient
|
|
from ix.ocr import make_ocr_client
|
|
from ix.ocr.client import OCRClient
|
|
from ix.pipeline.genai_step import GenAIStep
|
|
from ix.pipeline.ocr_step import OCRStep
|
|
from ix.pipeline.pipeline import Pipeline
|
|
from ix.pipeline.reliability_step import ReliabilityStep
|
|
from ix.pipeline.response_handler_step import ResponseHandlerStep
|
|
from ix.pipeline.setup_step import SetupStep
|
|
|
|
|
|
def build_pipeline(
|
|
genai: GenAIClient, ocr: OCRClient, cfg: AppConfig
|
|
) -> Pipeline:
|
|
"""Assemble the production :class:`Pipeline` with injected clients.
|
|
|
|
Kept as a module-level helper so tests that want to exercise the
|
|
production wiring (without running the worker) can call it directly.
|
|
"""
|
|
|
|
from pathlib import Path
|
|
|
|
from ix.ingestion import FetchConfig
|
|
|
|
return Pipeline(
|
|
steps=[
|
|
SetupStep(
|
|
tmp_dir=Path(cfg.tmp_dir),
|
|
fetch_config=FetchConfig(
|
|
connect_timeout_s=float(cfg.file_connect_timeout_seconds),
|
|
read_timeout_s=float(cfg.file_read_timeout_seconds),
|
|
max_bytes=cfg.file_max_bytes,
|
|
),
|
|
),
|
|
OCRStep(ocr_client=ocr),
|
|
GenAIStep(genai_client=genai),
|
|
ReliabilityStep(),
|
|
ResponseHandlerStep(),
|
|
]
|
|
)
|
|
|
|
|
|
def _make_ollama_probe(
|
|
genai: GenAIClient, cfg: AppConfig
|
|
) -> Callable[[], Literal["ok", "degraded", "fail"]]:
|
|
"""Adapter: async ``selfcheck`` → sync callable the route expects.
|
|
|
|
Always drives the coroutine on a throwaway event loop in a separate
|
|
thread. This keeps the behavior identical whether the caller holds an
|
|
event loop (FastAPI request) or doesn't (a CLI tool), and avoids the
|
|
``asyncio.run`` vs. already-running-loop footgun.
|
|
"""
|
|
|
|
def probe() -> Literal["ok", "degraded", "fail"]:
|
|
if not hasattr(genai, "selfcheck"):
|
|
return "ok" # fake client — nothing to probe.
|
|
return _run_async_sync(
|
|
lambda: genai.selfcheck(expected_model=cfg.default_model), # type: ignore[attr-defined]
|
|
fallback="fail",
|
|
)
|
|
|
|
return probe
|
|
|
|
|
|
def _make_ocr_probe(ocr: OCRClient) -> Callable[[], Literal["ok", "fail"]]:
|
|
def probe() -> Literal["ok", "fail"]:
|
|
if not hasattr(ocr, "selfcheck"):
|
|
return "ok" # fake — nothing to probe.
|
|
return _run_async_sync(
|
|
lambda: ocr.selfcheck(), # type: ignore[attr-defined]
|
|
fallback="fail",
|
|
)
|
|
|
|
return probe
|
|
|
|
|
|
def _run_async_sync(make_coro, *, fallback: str) -> str: # type: ignore[no-untyped-def]
|
|
"""Run ``make_coro()`` on a fresh loop in a thread; return its result.
|
|
|
|
The thread owns its own event loop so the caller's loop (if any) keeps
|
|
running. Any exception collapses to ``fallback``.
|
|
"""
|
|
|
|
import threading
|
|
|
|
result: dict[str, object] = {}
|
|
|
|
def _runner() -> None:
|
|
loop = asyncio.new_event_loop()
|
|
try:
|
|
result["value"] = loop.run_until_complete(make_coro())
|
|
except Exception as exc: # any error collapses to fallback
|
|
result["error"] = exc
|
|
finally:
|
|
loop.close()
|
|
|
|
t = threading.Thread(target=_runner)
|
|
t.start()
|
|
t.join()
|
|
if "error" in result or "value" not in result:
|
|
return fallback
|
|
return str(result["value"])
|
|
|
|
|
|
def create_app(*, spawn_worker: bool = True) -> FastAPI:
|
|
"""Construct the ASGI app.
|
|
|
|
Parameters
|
|
----------
|
|
spawn_worker:
|
|
When True (default), the lifespan spawns the background worker task
|
|
and the pg_queue listener. Integration tests that only exercise the
|
|
REST adapter pass False so jobs pile up as ``pending`` and the tests
|
|
can assert on their state without a racing worker mutating them.
|
|
"""
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(_app: FastAPI) -> AsyncIterator[None]:
|
|
cfg = get_config()
|
|
|
|
# Build the clients once per process. The worker's pipeline
|
|
# factory closes over these so every job runs through the same
|
|
# Ollama/Surya instance (Surya's predictors are heavy; re-loading
|
|
# them per job would be catastrophic).
|
|
genai_client = make_genai_client(cfg)
|
|
ocr_client = make_ocr_client(cfg)
|
|
|
|
# Override the route-level probe DI so /healthz reflects the
|
|
# actual clients. Tests that want canned probes can still override
|
|
# ``get_probes`` at the TestClient layer.
|
|
_app.dependency_overrides.setdefault(
|
|
get_probes,
|
|
lambda: Probes(
|
|
ollama=_make_ollama_probe(genai_client, cfg),
|
|
ocr=_make_ocr_probe(ocr_client),
|
|
),
|
|
)
|
|
|
|
worker_task = None
|
|
listener = None
|
|
if spawn_worker:
|
|
try:
|
|
from ix.adapters.pg_queue.listener import (
|
|
PgQueueListener,
|
|
asyncpg_dsn_from_sqlalchemy_url,
|
|
)
|
|
|
|
listener = PgQueueListener(
|
|
dsn=asyncpg_dsn_from_sqlalchemy_url(cfg.postgres_url)
|
|
)
|
|
await listener.start()
|
|
except Exception:
|
|
listener = None
|
|
|
|
try:
|
|
worker_task = await _spawn_production_worker(
|
|
cfg, genai_client, ocr_client, listener
|
|
)
|
|
except Exception:
|
|
worker_task = None
|
|
try:
|
|
yield
|
|
finally:
|
|
if worker_task is not None:
|
|
worker_task.cancel()
|
|
with suppress(Exception):
|
|
await worker_task
|
|
if listener is not None:
|
|
with suppress(Exception):
|
|
await listener.stop()
|
|
|
|
app = FastAPI(lifespan=lifespan, title="infoxtractor", version="0.1.0")
|
|
app.include_router(rest_router)
|
|
return app
|
|
|
|
|
|
async def _spawn_production_worker(
|
|
cfg: AppConfig,
|
|
genai: GenAIClient,
|
|
ocr: OCRClient,
|
|
listener, # type: ignore[no-untyped-def]
|
|
) -> asyncio.Task[None]:
|
|
"""Spawn the background worker with a production pipeline factory."""
|
|
|
|
from ix.store.engine import get_session_factory
|
|
from ix.worker.loop import Worker
|
|
|
|
def pipeline_factory() -> Pipeline:
|
|
return build_pipeline(genai, ocr, cfg)
|
|
|
|
worker = Worker(
|
|
session_factory=get_session_factory(),
|
|
pipeline_factory=pipeline_factory,
|
|
poll_interval_seconds=10.0,
|
|
max_running_seconds=2 * cfg.pipeline_request_timeout_seconds,
|
|
callback_timeout_seconds=cfg.callback_timeout_seconds,
|
|
wait_for_work=listener.wait_for_work if listener is not None else None,
|
|
)
|
|
|
|
stop = asyncio.Event()
|
|
return asyncio.create_task(worker.run(stop))
|