Merge pull request 'feat(ui): add browser UI at /ui' (#45) from feat/ui into main
All checks were successful
tests / test (push) Successful in 3m29s
All checks were successful
tests / test (push) Successful in 3m29s
This commit is contained in:
commit
136e31c82c
11 changed files with 934 additions and 3 deletions
|
|
@ -4,9 +4,9 @@ Async, on-prem, LLM-powered structured information extraction microservice. Give
|
||||||
|
|
||||||
Designed to be used by other on-prem services (e.g. mammon) as a reliable fallback / second opinion for format-specific deterministic parsers.
|
Designed to be used by other on-prem services (e.g. mammon) as a reliable fallback / second opinion for format-specific deterministic parsers.
|
||||||
|
|
||||||
Status: MVP deployed (2026-04-18) at `http://192.168.68.42:8994` — LAN only. Full reference spec at `docs/spec-core-pipeline.md`; MVP spec at `docs/superpowers/specs/2026-04-18-ix-mvp-design.md`; deploy runbook at `docs/deployment.md`.
|
Status: MVP deployed (2026-04-18) at `http://192.168.68.42:8994` — LAN only. Browser UI at `http://192.168.68.42:8994/ui`. Full reference spec at `docs/spec-core-pipeline.md`; MVP spec at `docs/superpowers/specs/2026-04-18-ix-mvp-design.md`; deploy runbook at `docs/deployment.md`.
|
||||||
|
|
||||||
Use cases: the built-in registry lives in `src/ix/use_cases/__init__.py` (`bank_statement_header` for MVP). Callers without a registered entry can ship an ad-hoc schema inline via `RequestIX.use_case_inline` (see README "Ad-hoc use cases"); the pipeline builds the Pydantic classes on the fly per request.
|
Use cases: the built-in registry lives in `src/ix/use_cases/__init__.py` (`bank_statement_header` for MVP). Callers without a registered entry can ship an ad-hoc schema inline via `RequestIX.use_case_inline` (see README "Ad-hoc use cases"); the pipeline builds the Pydantic classes on the fly per request. The `/ui` page exposes this as a "custom" option so non-engineering users can experiment without a deploy.
|
||||||
|
|
||||||
## Guiding Principles
|
## Guiding Principles
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,11 @@ Async, on-prem, LLM-powered structured information extraction microservice.
|
||||||
|
|
||||||
Given a document (PDF, image, text) and a named *use case*, ix returns a structured JSON result whose shape matches the use-case schema — together with per-field provenance (OCR segment IDs, bounding boxes, cross-OCR agreement flags) that let the caller decide how much to trust each extracted value.
|
Given a document (PDF, image, text) and a named *use case*, ix returns a structured JSON result whose shape matches the use-case schema — together with per-field provenance (OCR segment IDs, bounding boxes, cross-OCR agreement flags) that let the caller decide how much to trust each extracted value.
|
||||||
|
|
||||||
**Status:** MVP deployed. Live on the home LAN at `http://192.168.68.42:8994`.
|
**Status:** MVP deployed. Live on the home LAN at `http://192.168.68.42:8994` (REST API + browser UI at `/ui`).
|
||||||
|
|
||||||
|
## Web UI
|
||||||
|
|
||||||
|
A minimal browser UI lives at [`http://192.168.68.42:8994/ui`](http://192.168.68.42:8994/ui): drop a PDF, pick a registered use case or define one inline, submit, see the pretty-printed result. HTMX polls the job status every 2 s until the pipeline finishes. LAN-only, no auth.
|
||||||
|
|
||||||
- Full reference spec: [`docs/spec-core-pipeline.md`](docs/spec-core-pipeline.md) (aspirational; MVP is a strict subset)
|
- Full reference spec: [`docs/spec-core-pipeline.md`](docs/spec-core-pipeline.md) (aspirational; MVP is a strict subset)
|
||||||
- **MVP design:** [`docs/superpowers/specs/2026-04-18-ix-mvp-design.md`](docs/superpowers/specs/2026-04-18-ix-mvp-design.md)
|
- **MVP design:** [`docs/superpowers/specs/2026-04-18-ix-mvp-design.md`](docs/superpowers/specs/2026-04-18-ix-mvp-design.md)
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,14 @@ dependencies = [
|
||||||
"pillow>=10.2,<11.0",
|
"pillow>=10.2,<11.0",
|
||||||
"python-magic>=0.4.27",
|
"python-magic>=0.4.27",
|
||||||
"python-dateutil>=2.9",
|
"python-dateutil>=2.9",
|
||||||
|
|
||||||
|
# UI (HTMX + Jinja2 templates served from /ui). Both arrive as transitive
|
||||||
|
# deps via FastAPI/Starlette already, but we pin explicitly so the import
|
||||||
|
# surface is owned by us. python-multipart backs FastAPI's `Form()` /
|
||||||
|
# `UploadFile` parsing — required by `/ui/jobs` submissions.
|
||||||
|
"jinja2>=3.1",
|
||||||
|
"aiofiles>=24.1",
|
||||||
|
"python-multipart>=0.0.12",
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@ from contextlib import asynccontextmanager, suppress
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
from fastapi import FastAPI
|
from fastapi import FastAPI
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
|
||||||
from ix.adapters.rest.routes import Probes, get_probes
|
from ix.adapters.rest.routes import Probes, get_probes
|
||||||
from ix.adapters.rest.routes import router as rest_router
|
from ix.adapters.rest.routes import router as rest_router
|
||||||
|
|
@ -38,6 +39,8 @@ from ix.pipeline.pipeline import Pipeline
|
||||||
from ix.pipeline.reliability_step import ReliabilityStep
|
from ix.pipeline.reliability_step import ReliabilityStep
|
||||||
from ix.pipeline.response_handler_step import ResponseHandlerStep
|
from ix.pipeline.response_handler_step import ResponseHandlerStep
|
||||||
from ix.pipeline.setup_step import SetupStep
|
from ix.pipeline.setup_step import SetupStep
|
||||||
|
from ix.ui import build_router as build_ui_router
|
||||||
|
from ix.ui.routes import STATIC_DIR as UI_STATIC_DIR
|
||||||
|
|
||||||
|
|
||||||
def build_pipeline(
|
def build_pipeline(
|
||||||
|
|
@ -202,6 +205,16 @@ def create_app(*, spawn_worker: bool = True) -> FastAPI:
|
||||||
|
|
||||||
app = FastAPI(lifespan=lifespan, title="infoxtractor", version="0.1.0")
|
app = FastAPI(lifespan=lifespan, title="infoxtractor", version="0.1.0")
|
||||||
app.include_router(rest_router)
|
app.include_router(rest_router)
|
||||||
|
# Browser UI — additive, never touches the REST paths above.
|
||||||
|
app.include_router(build_ui_router())
|
||||||
|
# Static assets for the UI. CDN-only for MVP so the directory is
|
||||||
|
# essentially empty, but the mount must exist so relative asset
|
||||||
|
# URLs resolve cleanly.
|
||||||
|
app.mount(
|
||||||
|
"/ui/static",
|
||||||
|
StaticFiles(directory=str(UI_STATIC_DIR)),
|
||||||
|
name="ui-static",
|
||||||
|
)
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
13
src/ix/ui/__init__.py
Normal file
13
src/ix/ui/__init__.py
Normal file
|
|
@ -0,0 +1,13 @@
|
||||||
|
"""Minimal browser UI served alongside the REST API at ``/ui``.
|
||||||
|
|
||||||
|
The module is intentionally thin: templates + HTMX + Pico CSS (all from
|
||||||
|
CDNs, no build step). Uploads land in ``{cfg.tmp_dir}/ui/<uuid>.pdf`` and
|
||||||
|
are submitted through the same :func:`ix.store.jobs_repo.insert_pending`
|
||||||
|
entry point the REST adapter uses — the UI does not duplicate that logic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from ix.ui.routes import build_router
|
||||||
|
|
||||||
|
__all__ = ["build_router"]
|
||||||
307
src/ix/ui/routes.py
Normal file
307
src/ix/ui/routes.py
Normal file
|
|
@ -0,0 +1,307 @@
|
||||||
|
"""``/ui`` router — thin HTML wrapper over the existing jobs pipeline.
|
||||||
|
|
||||||
|
Design notes:
|
||||||
|
|
||||||
|
* Uploads stream to ``{cfg.tmp_dir}/ui/{uuid4()}.pdf`` via aiofiles; the
|
||||||
|
file persists for the lifetime of the ``ix_id`` (no cleanup cron — spec
|
||||||
|
deferred).
|
||||||
|
* The submission handler builds a :class:`RequestIX` (inline use case
|
||||||
|
supported) and inserts it via the same
|
||||||
|
:func:`ix.store.jobs_repo.insert_pending` the REST adapter uses.
|
||||||
|
* Responses are HTML. For HTMX-triggered submissions the handler returns
|
||||||
|
``HX-Redirect`` so the whole page swaps; for plain form posts it returns
|
||||||
|
a 303 redirect.
|
||||||
|
* The fragment endpoint powers the polling loop: while the job is
|
||||||
|
pending/running, the fragment auto-refreshes every 2s via
|
||||||
|
``hx-trigger="every 2s"``; when terminal, the trigger is dropped and the
|
||||||
|
pretty-printed response is rendered with highlight.js.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Annotated
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
import aiofiles
|
||||||
|
from fastapi import (
|
||||||
|
APIRouter,
|
||||||
|
Depends,
|
||||||
|
File,
|
||||||
|
Form,
|
||||||
|
HTTPException,
|
||||||
|
Request,
|
||||||
|
UploadFile,
|
||||||
|
)
|
||||||
|
from fastapi.responses import HTMLResponse, RedirectResponse, Response
|
||||||
|
from fastapi.templating import Jinja2Templates
|
||||||
|
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
|
||||||
|
|
||||||
|
from ix.adapters.rest.routes import get_session_factory_dep
|
||||||
|
from ix.config import AppConfig, get_config
|
||||||
|
from ix.contracts.request import (
|
||||||
|
Context,
|
||||||
|
FileRef,
|
||||||
|
GenAIOptions,
|
||||||
|
InlineUseCase,
|
||||||
|
OCROptions,
|
||||||
|
Options,
|
||||||
|
ProvenanceOptions,
|
||||||
|
RequestIX,
|
||||||
|
UseCaseFieldDef,
|
||||||
|
)
|
||||||
|
from ix.store import jobs_repo
|
||||||
|
from ix.use_cases import REGISTRY
|
||||||
|
|
||||||
|
TEMPLATES_DIR = Path(__file__).parent / "templates"
|
||||||
|
STATIC_DIR = Path(__file__).parent / "static"
|
||||||
|
|
||||||
|
|
||||||
|
def _templates() -> Jinja2Templates:
|
||||||
|
"""One Jinja env per process; cheap enough to build per DI call."""
|
||||||
|
|
||||||
|
return Jinja2Templates(directory=str(TEMPLATES_DIR))
|
||||||
|
|
||||||
|
|
||||||
|
def _ui_tmp_dir(cfg: AppConfig) -> Path:
|
||||||
|
"""Where uploads land. Created on first use; never cleaned up."""
|
||||||
|
|
||||||
|
d = Path(cfg.tmp_dir) / "ui"
|
||||||
|
d.mkdir(parents=True, exist_ok=True)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def build_router() -> APIRouter:
|
||||||
|
"""Return a fresh router. Kept as a factory so :mod:`ix.app` can wire DI."""
|
||||||
|
|
||||||
|
router = APIRouter(prefix="/ui", tags=["ui"])
|
||||||
|
|
||||||
|
@router.get("", response_class=HTMLResponse)
|
||||||
|
@router.get("/", response_class=HTMLResponse)
|
||||||
|
async def index(request: Request) -> Response:
|
||||||
|
tpl = _templates()
|
||||||
|
return tpl.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"index.html",
|
||||||
|
{
|
||||||
|
"registered_use_cases": sorted(REGISTRY.keys()),
|
||||||
|
"job": None,
|
||||||
|
"form_error": None,
|
||||||
|
"form_values": {},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.get("/jobs/{job_id}", response_class=HTMLResponse)
|
||||||
|
async def job_page(
|
||||||
|
request: Request,
|
||||||
|
job_id: UUID,
|
||||||
|
session_factory: Annotated[
|
||||||
|
async_sessionmaker[AsyncSession], Depends(get_session_factory_dep)
|
||||||
|
],
|
||||||
|
) -> Response:
|
||||||
|
async with session_factory() as session:
|
||||||
|
job = await jobs_repo.get(session, job_id)
|
||||||
|
if job is None:
|
||||||
|
raise HTTPException(status_code=404, detail="job not found")
|
||||||
|
tpl = _templates()
|
||||||
|
return tpl.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"index.html",
|
||||||
|
{
|
||||||
|
"registered_use_cases": sorted(REGISTRY.keys()),
|
||||||
|
"job": job,
|
||||||
|
"form_error": None,
|
||||||
|
"form_values": {},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.get("/jobs/{job_id}/fragment", response_class=HTMLResponse)
|
||||||
|
async def job_fragment(
|
||||||
|
request: Request,
|
||||||
|
job_id: UUID,
|
||||||
|
session_factory: Annotated[
|
||||||
|
async_sessionmaker[AsyncSession], Depends(get_session_factory_dep)
|
||||||
|
],
|
||||||
|
) -> Response:
|
||||||
|
async with session_factory() as session:
|
||||||
|
job = await jobs_repo.get(session, job_id)
|
||||||
|
if job is None:
|
||||||
|
raise HTTPException(status_code=404, detail="job not found")
|
||||||
|
response_json: str | None = None
|
||||||
|
if job.response is not None:
|
||||||
|
response_json = json.dumps(
|
||||||
|
job.response.model_dump(mode="json"),
|
||||||
|
indent=2,
|
||||||
|
sort_keys=True,
|
||||||
|
default=str,
|
||||||
|
)
|
||||||
|
tpl = _templates()
|
||||||
|
return tpl.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"job_fragment.html",
|
||||||
|
{"job": job, "response_json": response_json},
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.post("/jobs")
|
||||||
|
async def submit_job(
|
||||||
|
request: Request,
|
||||||
|
session_factory: Annotated[
|
||||||
|
async_sessionmaker[AsyncSession], Depends(get_session_factory_dep)
|
||||||
|
],
|
||||||
|
pdf: Annotated[UploadFile, File()],
|
||||||
|
use_case_name: Annotated[str, Form()],
|
||||||
|
use_case_mode: Annotated[str, Form()] = "registered",
|
||||||
|
texts: Annotated[str, Form()] = "",
|
||||||
|
ix_client_id: Annotated[str, Form()] = "ui",
|
||||||
|
request_id: Annotated[str, Form()] = "",
|
||||||
|
system_prompt: Annotated[str, Form()] = "",
|
||||||
|
default_model: Annotated[str, Form()] = "",
|
||||||
|
fields_json: Annotated[str, Form()] = "",
|
||||||
|
use_ocr: Annotated[str, Form()] = "",
|
||||||
|
ocr_only: Annotated[str, Form()] = "",
|
||||||
|
include_ocr_text: Annotated[str, Form()] = "",
|
||||||
|
include_geometries: Annotated[str, Form()] = "",
|
||||||
|
gen_ai_model_name: Annotated[str, Form()] = "",
|
||||||
|
include_provenance: Annotated[str, Form()] = "",
|
||||||
|
max_sources_per_field: Annotated[str, Form()] = "10",
|
||||||
|
) -> Response:
|
||||||
|
cfg = get_config()
|
||||||
|
form_values = {
|
||||||
|
"use_case_mode": use_case_mode,
|
||||||
|
"use_case_name": use_case_name,
|
||||||
|
"ix_client_id": ix_client_id,
|
||||||
|
"request_id": request_id,
|
||||||
|
"texts": texts,
|
||||||
|
"system_prompt": system_prompt,
|
||||||
|
"default_model": default_model,
|
||||||
|
"fields_json": fields_json,
|
||||||
|
"use_ocr": use_ocr,
|
||||||
|
"ocr_only": ocr_only,
|
||||||
|
"include_ocr_text": include_ocr_text,
|
||||||
|
"include_geometries": include_geometries,
|
||||||
|
"gen_ai_model_name": gen_ai_model_name,
|
||||||
|
"include_provenance": include_provenance,
|
||||||
|
"max_sources_per_field": max_sources_per_field,
|
||||||
|
}
|
||||||
|
|
||||||
|
def _rerender(error: str, status: int = 200) -> Response:
|
||||||
|
tpl = _templates()
|
||||||
|
return tpl.TemplateResponse(
|
||||||
|
request,
|
||||||
|
"index.html",
|
||||||
|
{
|
||||||
|
"registered_use_cases": sorted(REGISTRY.keys()),
|
||||||
|
"job": None,
|
||||||
|
"form_error": error,
|
||||||
|
"form_values": form_values,
|
||||||
|
},
|
||||||
|
status_code=status,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Inline use case (optional) ---
|
||||||
|
inline: InlineUseCase | None = None
|
||||||
|
if use_case_mode == "custom":
|
||||||
|
try:
|
||||||
|
raw_fields = json.loads(fields_json)
|
||||||
|
except json.JSONDecodeError as exc:
|
||||||
|
return _rerender(f"Invalid fields JSON: {exc}", status=422)
|
||||||
|
if not isinstance(raw_fields, list):
|
||||||
|
return _rerender(
|
||||||
|
"Invalid fields JSON: must be a list of field objects",
|
||||||
|
status=422,
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
parsed = [UseCaseFieldDef.model_validate(f) for f in raw_fields]
|
||||||
|
inline = InlineUseCase(
|
||||||
|
use_case_name=use_case_name,
|
||||||
|
system_prompt=system_prompt,
|
||||||
|
default_model=default_model or None,
|
||||||
|
fields=parsed,
|
||||||
|
)
|
||||||
|
except Exception as exc: # pydantic ValidationError or similar
|
||||||
|
return _rerender(
|
||||||
|
f"Invalid inline use-case definition: {exc}",
|
||||||
|
status=422,
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- PDF upload ---
|
||||||
|
upload_dir = _ui_tmp_dir(cfg)
|
||||||
|
target = upload_dir / f"{uuid.uuid4().hex}.pdf"
|
||||||
|
# Stream copy with a size cap matching IX_FILE_MAX_BYTES.
|
||||||
|
total = 0
|
||||||
|
limit = cfg.file_max_bytes
|
||||||
|
async with aiofiles.open(target, "wb") as out:
|
||||||
|
while True:
|
||||||
|
chunk = await pdf.read(64 * 1024)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
total += len(chunk)
|
||||||
|
if total > limit:
|
||||||
|
# Drop the partial file; no stored state.
|
||||||
|
from contextlib import suppress
|
||||||
|
|
||||||
|
with suppress(FileNotFoundError):
|
||||||
|
target.unlink()
|
||||||
|
return _rerender(
|
||||||
|
f"PDF exceeds IX_FILE_MAX_BYTES ({limit} bytes)",
|
||||||
|
status=413,
|
||||||
|
)
|
||||||
|
await out.write(chunk)
|
||||||
|
|
||||||
|
# --- Build RequestIX ---
|
||||||
|
ctx_texts: list[str] = []
|
||||||
|
if texts.strip():
|
||||||
|
ctx_texts = [texts.strip()]
|
||||||
|
|
||||||
|
req_id = request_id.strip() or uuid.uuid4().hex
|
||||||
|
try:
|
||||||
|
request_ix = RequestIX(
|
||||||
|
use_case=use_case_name or "adhoc",
|
||||||
|
use_case_inline=inline,
|
||||||
|
ix_client_id=(ix_client_id.strip() or "ui"),
|
||||||
|
request_id=req_id,
|
||||||
|
context=Context(
|
||||||
|
files=[FileRef(url=f"file://{target.resolve()}")],
|
||||||
|
texts=ctx_texts,
|
||||||
|
),
|
||||||
|
options=Options(
|
||||||
|
ocr=OCROptions(
|
||||||
|
use_ocr=_flag(use_ocr, default=True),
|
||||||
|
ocr_only=_flag(ocr_only, default=False),
|
||||||
|
include_ocr_text=_flag(include_ocr_text, default=False),
|
||||||
|
include_geometries=_flag(include_geometries, default=False),
|
||||||
|
),
|
||||||
|
gen_ai=GenAIOptions(
|
||||||
|
gen_ai_model_name=(gen_ai_model_name.strip() or None),
|
||||||
|
),
|
||||||
|
provenance=ProvenanceOptions(
|
||||||
|
include_provenance=_flag(include_provenance, default=True),
|
||||||
|
max_sources_per_field=int(max_sources_per_field or 10),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
return _rerender(f"Invalid request: {exc}", status=422)
|
||||||
|
|
||||||
|
async with session_factory() as session:
|
||||||
|
job = await jobs_repo.insert_pending(
|
||||||
|
session, request_ix, callback_url=None
|
||||||
|
)
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
redirect_to = f"/ui/jobs/{job.job_id}"
|
||||||
|
if request.headers.get("HX-Request", "").lower() == "true":
|
||||||
|
return Response(status_code=200, headers={"HX-Redirect": redirect_to})
|
||||||
|
return RedirectResponse(url=redirect_to, status_code=303)
|
||||||
|
|
||||||
|
return router
|
||||||
|
|
||||||
|
|
||||||
|
def _flag(value: str, *, default: bool) -> bool:
|
||||||
|
"""HTML forms omit unchecked checkboxes. Treat absence as ``default``."""
|
||||||
|
|
||||||
|
if value == "":
|
||||||
|
return default
|
||||||
|
return value.lower() in ("on", "true", "1", "yes")
|
||||||
0
src/ix/ui/static/.gitkeep
Normal file
0
src/ix/ui/static/.gitkeep
Normal file
188
src/ix/ui/templates/index.html
Normal file
188
src/ix/ui/templates/index.html
Normal file
|
|
@ -0,0 +1,188 @@
|
||||||
|
<!doctype html>
|
||||||
|
<html lang="en" data-theme="light">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8" />
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1" />
|
||||||
|
<title>infoxtractor UI</title>
|
||||||
|
<link
|
||||||
|
rel="stylesheet"
|
||||||
|
href="https://cdn.jsdelivr.net/npm/@picocss/pico@2/css/pico.min.css"
|
||||||
|
/>
|
||||||
|
<link
|
||||||
|
rel="stylesheet"
|
||||||
|
href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/styles/atom-one-light.min.css"
|
||||||
|
/>
|
||||||
|
<script src="https://unpkg.com/htmx.org@1.9.12"></script>
|
||||||
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.10.0/highlight.min.js"></script>
|
||||||
|
<style>
|
||||||
|
main { padding-top: 2rem; padding-bottom: 4rem; }
|
||||||
|
pre code.hljs { padding: 1rem; border-radius: 0.4rem; }
|
||||||
|
.form-error { color: var(--pico-del-color, #c44); font-weight: 600; }
|
||||||
|
details[open] > summary { margin-bottom: 0.5rem; }
|
||||||
|
.field-hint { font-size: 0.85rem; color: var(--pico-muted-color); }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<main class="container">
|
||||||
|
<hgroup>
|
||||||
|
<h1>infoxtractor</h1>
|
||||||
|
<p>Drop a PDF, pick or define a use case, run the pipeline.</p>
|
||||||
|
</hgroup>
|
||||||
|
|
||||||
|
{% if form_error %}
|
||||||
|
<article class="form-error">
|
||||||
|
<p><strong>Form error:</strong> {{ form_error }}</p>
|
||||||
|
</article>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<article>
|
||||||
|
<form
|
||||||
|
action="/ui/jobs"
|
||||||
|
method="post"
|
||||||
|
enctype="multipart/form-data"
|
||||||
|
hx-post="/ui/jobs"
|
||||||
|
hx-encoding="multipart/form-data"
|
||||||
|
>
|
||||||
|
<label>
|
||||||
|
PDF file
|
||||||
|
<input type="file" name="pdf" accept="application/pdf" required />
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label>
|
||||||
|
Extra texts (optional, e.g. Paperless OCR output)
|
||||||
|
<textarea
|
||||||
|
name="texts"
|
||||||
|
rows="3"
|
||||||
|
placeholder="Plain text passed as context.texts[0]"
|
||||||
|
>{{ form_values.get("texts", "") }}</textarea>
|
||||||
|
<small class="field-hint">Whatever you type is submitted as a single entry in <code>context.texts</code>.</small>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<fieldset>
|
||||||
|
<legend>Use case</legend>
|
||||||
|
<label>
|
||||||
|
<input
|
||||||
|
type="radio"
|
||||||
|
name="use_case_mode"
|
||||||
|
value="registered"
|
||||||
|
{% if form_values.get("use_case_mode", "registered") == "registered" %}checked{% endif %}
|
||||||
|
onchange="document.getElementById('custom-fields').hidden = true"
|
||||||
|
/>
|
||||||
|
Registered
|
||||||
|
</label>
|
||||||
|
<label>
|
||||||
|
<input
|
||||||
|
type="radio"
|
||||||
|
name="use_case_mode"
|
||||||
|
value="custom"
|
||||||
|
{% if form_values.get("use_case_mode") == "custom" %}checked{% endif %}
|
||||||
|
onchange="document.getElementById('custom-fields').hidden = false"
|
||||||
|
/>
|
||||||
|
Custom (inline)
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<label>
|
||||||
|
Use case name
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
name="use_case_name"
|
||||||
|
list="registered-use-cases"
|
||||||
|
value="{{ form_values.get('use_case_name', 'bank_statement_header') }}"
|
||||||
|
required
|
||||||
|
/>
|
||||||
|
<datalist id="registered-use-cases">
|
||||||
|
{% for name in registered_use_cases %}
|
||||||
|
<option value="{{ name }}"></option>
|
||||||
|
{% endfor %}
|
||||||
|
</datalist>
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<div id="custom-fields" {% if form_values.get("use_case_mode") != "custom" %}hidden{% endif %}>
|
||||||
|
<label>
|
||||||
|
System prompt
|
||||||
|
<textarea name="system_prompt" rows="3">{{ form_values.get("system_prompt", "") }}</textarea>
|
||||||
|
</label>
|
||||||
|
<label>
|
||||||
|
Default model (optional)
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
name="default_model"
|
||||||
|
value="{{ form_values.get('default_model', '') }}"
|
||||||
|
placeholder="qwen3:14b"
|
||||||
|
/>
|
||||||
|
</label>
|
||||||
|
<label>
|
||||||
|
Fields (JSON list of {name, type, required?, choices?, description?})
|
||||||
|
<textarea name="fields_json" rows="6" placeholder='[{"name": "vendor", "type": "str", "required": true}]'>{{ form_values.get("fields_json", "") }}</textarea>
|
||||||
|
<small class="field-hint">Types: str, int, float, decimal, date, datetime, bool. <code>choices</code> works on <code>str</code> only.</small>
|
||||||
|
</label>
|
||||||
|
</div>
|
||||||
|
</fieldset>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>Advanced options</summary>
|
||||||
|
<label>
|
||||||
|
Client id
|
||||||
|
<input type="text" name="ix_client_id" value="{{ form_values.get('ix_client_id', 'ui') }}" />
|
||||||
|
</label>
|
||||||
|
<label>
|
||||||
|
Request id (blank → random)
|
||||||
|
<input type="text" name="request_id" value="{{ form_values.get('request_id', '') }}" />
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<fieldset>
|
||||||
|
<legend>OCR</legend>
|
||||||
|
<label><input type="checkbox" name="use_ocr" {% if form_values.get("use_ocr", "on") %}checked{% endif %} /> use_ocr</label>
|
||||||
|
<label><input type="checkbox" name="ocr_only" {% if form_values.get("ocr_only") %}checked{% endif %} /> ocr_only</label>
|
||||||
|
<label><input type="checkbox" name="include_ocr_text" {% if form_values.get("include_ocr_text") %}checked{% endif %} /> include_ocr_text</label>
|
||||||
|
<label><input type="checkbox" name="include_geometries" {% if form_values.get("include_geometries") %}checked{% endif %} /> include_geometries</label>
|
||||||
|
</fieldset>
|
||||||
|
|
||||||
|
<label>
|
||||||
|
GenAI model override (optional)
|
||||||
|
<input type="text" name="gen_ai_model_name" value="{{ form_values.get('gen_ai_model_name', '') }}" />
|
||||||
|
</label>
|
||||||
|
|
||||||
|
<fieldset>
|
||||||
|
<legend>Provenance</legend>
|
||||||
|
<label><input type="checkbox" name="include_provenance" {% if form_values.get("include_provenance", "on") %}checked{% endif %} /> include_provenance</label>
|
||||||
|
<label>
|
||||||
|
max_sources_per_field
|
||||||
|
<input type="number" name="max_sources_per_field" min="1" max="100" value="{{ form_values.get('max_sources_per_field', '10') }}" />
|
||||||
|
</label>
|
||||||
|
</fieldset>
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<button type="submit">Submit</button>
|
||||||
|
</form>
|
||||||
|
</article>
|
||||||
|
|
||||||
|
{% if job %}
|
||||||
|
<article id="job-panel">
|
||||||
|
<header>
|
||||||
|
<strong>Job</strong> <code>{{ job.job_id }}</code>
|
||||||
|
<br /><small>ix_id: <code>{{ job.ix_id }}</code></small>
|
||||||
|
</header>
|
||||||
|
<div
|
||||||
|
id="job-status"
|
||||||
|
hx-get="/ui/jobs/{{ job.job_id }}/fragment"
|
||||||
|
hx-trigger="load"
|
||||||
|
hx-swap="innerHTML"
|
||||||
|
>
|
||||||
|
Loading…
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
{% endif %}
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
document.body.addEventListener("htmx:afterSettle", () => {
|
||||||
|
if (window.hljs) {
|
||||||
|
document.querySelectorAll("pre code").forEach((el) => {
|
||||||
|
try { hljs.highlightElement(el); } catch (_) { /* noop */ }
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
24
src/ix/ui/templates/job_fragment.html
Normal file
24
src/ix/ui/templates/job_fragment.html
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
{#- HTMX fragment rendered into #job-status on the results panel.
|
||||||
|
Pending/running → keep polling every 2s; terminal → render JSON. -#}
|
||||||
|
{% set terminal = job.status in ("done", "error") %}
|
||||||
|
<div
|
||||||
|
id="job-fragment"
|
||||||
|
{% if not terminal %}
|
||||||
|
hx-get="/ui/jobs/{{ job.job_id }}/fragment"
|
||||||
|
hx-trigger="every 2s"
|
||||||
|
hx-swap="outerHTML"
|
||||||
|
{% endif %}
|
||||||
|
>
|
||||||
|
<p>
|
||||||
|
Status: <strong>{{ job.status }}</strong>
|
||||||
|
{% if not terminal %}
|
||||||
|
<progress></progress>
|
||||||
|
{% endif %}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{% if terminal and response_json %}
|
||||||
|
<pre><code class="language-json">{{ response_json }}</code></pre>
|
||||||
|
{% elif terminal %}
|
||||||
|
<p><em>No response body.</em></p>
|
||||||
|
{% endif %}
|
||||||
|
</div>
|
||||||
350
tests/integration/test_ui_routes.py
Normal file
350
tests/integration/test_ui_routes.py
Normal file
|
|
@ -0,0 +1,350 @@
|
||||||
|
"""Integration tests for the `/ui` router (spec §PR 2).
|
||||||
|
|
||||||
|
Covers the full round-trip through `POST /ui/jobs` — the handler parses
|
||||||
|
multipart form data into a `RequestIX` and hands it to
|
||||||
|
`ix.store.jobs_repo.insert_pending`, the same entry point the REST adapter
|
||||||
|
uses. Tests assert the job row exists with the right client/request ids and
|
||||||
|
that custom-use-case forms produce a `use_case_inline` block in the stored
|
||||||
|
request JSON.
|
||||||
|
|
||||||
|
The DB-touching tests depend on the shared integration conftest which
|
||||||
|
spins up migrations against the configured Postgres; the pure-template
|
||||||
|
tests (`GET /ui` and the fragment renderer) still need a factory but
|
||||||
|
won't actually query — they're cheap.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from collections.abc import Iterator
|
||||||
|
from pathlib import Path
|
||||||
|
from uuid import uuid4
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
|
||||||
|
|
||||||
|
from ix.adapters.rest.routes import Probes, get_probes, get_session_factory_dep
|
||||||
|
from ix.app import create_app
|
||||||
|
from ix.store.models import IxJob
|
||||||
|
|
||||||
|
FIXTURE_DIR = Path(__file__).resolve().parents[1] / "fixtures"
|
||||||
|
FIXTURE_PDF = FIXTURE_DIR / "synthetic_giro.pdf"
|
||||||
|
|
||||||
|
|
||||||
|
def _factory_for_url(postgres_url: str): # type: ignore[no-untyped-def]
|
||||||
|
def _factory(): # type: ignore[no-untyped-def]
|
||||||
|
eng = create_async_engine(postgres_url, pool_pre_ping=True)
|
||||||
|
return async_sessionmaker(eng, expire_on_commit=False)
|
||||||
|
|
||||||
|
return _factory
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def app(postgres_url: str) -> Iterator[TestClient]:
|
||||||
|
app_obj = create_app(spawn_worker=False)
|
||||||
|
app_obj.dependency_overrides[get_session_factory_dep] = _factory_for_url(
|
||||||
|
postgres_url
|
||||||
|
)
|
||||||
|
app_obj.dependency_overrides[get_probes] = lambda: Probes(
|
||||||
|
ollama=lambda: "ok", ocr=lambda: "ok"
|
||||||
|
)
|
||||||
|
with TestClient(app_obj) as client:
|
||||||
|
yield client
|
||||||
|
|
||||||
|
|
||||||
|
class TestIndexPage:
|
||||||
|
def test_index_returns_html(self, app: TestClient) -> None:
|
||||||
|
resp = app.get("/ui")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert "text/html" in resp.headers["content-type"]
|
||||||
|
body = resp.text
|
||||||
|
# Dropdown prefilled with the registered use case.
|
||||||
|
assert "bank_statement_header" in body
|
||||||
|
# Marker for the submission form.
|
||||||
|
assert '<form' in body
|
||||||
|
|
||||||
|
def test_static_mount_is_reachable(self, app: TestClient) -> None:
|
||||||
|
# StaticFiles returns 404 for the keepfile; the mount itself must
|
||||||
|
# exist so asset URLs resolve. We probe the directory root instead.
|
||||||
|
resp = app.get("/ui/static/.gitkeep")
|
||||||
|
# .gitkeep exists in the repo — expect 200 (or at minimum not a 404
|
||||||
|
# due to missing mount). A 405/403 would also indicate the mount is
|
||||||
|
# wired; we assert the response is *not* a 404 from a missing route.
|
||||||
|
assert resp.status_code != 404
|
||||||
|
|
||||||
|
|
||||||
|
class TestSubmitJobRegistered:
|
||||||
|
def test_post_registered_use_case_creates_row(
|
||||||
|
self,
|
||||||
|
app: TestClient,
|
||||||
|
postgres_url: str,
|
||||||
|
) -> None:
|
||||||
|
request_id = f"ui-reg-{uuid4().hex[:8]}"
|
||||||
|
with FIXTURE_PDF.open("rb") as fh:
|
||||||
|
resp = app.post(
|
||||||
|
"/ui/jobs",
|
||||||
|
data={
|
||||||
|
"use_case_mode": "registered",
|
||||||
|
"use_case_name": "bank_statement_header",
|
||||||
|
"ix_client_id": "ui-test",
|
||||||
|
"request_id": request_id,
|
||||||
|
"texts": "",
|
||||||
|
"use_ocr": "on",
|
||||||
|
"include_provenance": "on",
|
||||||
|
"max_sources_per_field": "10",
|
||||||
|
},
|
||||||
|
files={"pdf": ("sample.pdf", fh, "application/pdf")},
|
||||||
|
follow_redirects=False,
|
||||||
|
)
|
||||||
|
assert resp.status_code in (200, 303), resp.text
|
||||||
|
|
||||||
|
# Assert the row exists in the DB.
|
||||||
|
job_row = _find_job(postgres_url, "ui-test", request_id)
|
||||||
|
assert job_row is not None
|
||||||
|
assert job_row.status == "pending"
|
||||||
|
assert job_row.request["use_case"] == "bank_statement_header"
|
||||||
|
# Context.files must reference a local file:// path.
|
||||||
|
files = job_row.request["context"]["files"]
|
||||||
|
assert len(files) == 1
|
||||||
|
entry = files[0]
|
||||||
|
url = entry if isinstance(entry, str) else entry["url"]
|
||||||
|
assert url.startswith("file://")
|
||||||
|
|
||||||
|
def test_htmx_submit_uses_hx_redirect_header(
|
||||||
|
self,
|
||||||
|
app: TestClient,
|
||||||
|
) -> None:
|
||||||
|
request_id = f"ui-htmx-{uuid4().hex[:8]}"
|
||||||
|
with FIXTURE_PDF.open("rb") as fh:
|
||||||
|
resp = app.post(
|
||||||
|
"/ui/jobs",
|
||||||
|
data={
|
||||||
|
"use_case_mode": "registered",
|
||||||
|
"use_case_name": "bank_statement_header",
|
||||||
|
"ix_client_id": "ui-test",
|
||||||
|
"request_id": request_id,
|
||||||
|
},
|
||||||
|
files={"pdf": ("sample.pdf", fh, "application/pdf")},
|
||||||
|
headers={"HX-Request": "true"},
|
||||||
|
follow_redirects=False,
|
||||||
|
)
|
||||||
|
assert resp.status_code == 200
|
||||||
|
assert "HX-Redirect" in resp.headers
|
||||||
|
|
||||||
|
|
||||||
|
class TestSubmitJobCustom:
|
||||||
|
def test_post_custom_use_case_stores_inline(
|
||||||
|
self,
|
||||||
|
app: TestClient,
|
||||||
|
postgres_url: str,
|
||||||
|
) -> None:
|
||||||
|
request_id = f"ui-cust-{uuid4().hex[:8]}"
|
||||||
|
fields_json = json.dumps(
|
||||||
|
[
|
||||||
|
{"name": "vendor", "type": "str", "required": True},
|
||||||
|
{"name": "total", "type": "decimal"},
|
||||||
|
]
|
||||||
|
)
|
||||||
|
with FIXTURE_PDF.open("rb") as fh:
|
||||||
|
resp = app.post(
|
||||||
|
"/ui/jobs",
|
||||||
|
data={
|
||||||
|
"use_case_mode": "custom",
|
||||||
|
"use_case_name": "invoice_adhoc",
|
||||||
|
"ix_client_id": "ui-test",
|
||||||
|
"request_id": request_id,
|
||||||
|
"system_prompt": "Extract vendor and total.",
|
||||||
|
"default_model": "qwen3:14b",
|
||||||
|
"fields_json": fields_json,
|
||||||
|
},
|
||||||
|
files={"pdf": ("sample.pdf", fh, "application/pdf")},
|
||||||
|
follow_redirects=False,
|
||||||
|
)
|
||||||
|
assert resp.status_code in (200, 303), resp.text
|
||||||
|
job_row = _find_job(postgres_url, "ui-test", request_id)
|
||||||
|
assert job_row is not None
|
||||||
|
stored = job_row.request["use_case_inline"]
|
||||||
|
assert stored is not None
|
||||||
|
assert stored["use_case_name"] == "invoice_adhoc"
|
||||||
|
assert stored["system_prompt"] == "Extract vendor and total."
|
||||||
|
names = [f["name"] for f in stored["fields"]]
|
||||||
|
assert names == ["vendor", "total"]
|
||||||
|
|
||||||
|
def test_post_malformed_fields_json_rejected(
|
||||||
|
self,
|
||||||
|
app: TestClient,
|
||||||
|
postgres_url: str,
|
||||||
|
) -> None:
|
||||||
|
request_id = f"ui-bad-{uuid4().hex[:8]}"
|
||||||
|
with FIXTURE_PDF.open("rb") as fh:
|
||||||
|
resp = app.post(
|
||||||
|
"/ui/jobs",
|
||||||
|
data={
|
||||||
|
"use_case_mode": "custom",
|
||||||
|
"use_case_name": "adhoc_bad",
|
||||||
|
"ix_client_id": "ui-test",
|
||||||
|
"request_id": request_id,
|
||||||
|
"system_prompt": "p",
|
||||||
|
"fields_json": "this is not json",
|
||||||
|
},
|
||||||
|
files={"pdf": ("sample.pdf", fh, "application/pdf")},
|
||||||
|
follow_redirects=False,
|
||||||
|
)
|
||||||
|
# Either re-rendered form (422 / 200 with error) — what matters is
|
||||||
|
# that no row was inserted.
|
||||||
|
assert resp.status_code in (200, 400, 422)
|
||||||
|
job_row = _find_job(postgres_url, "ui-test", request_id)
|
||||||
|
assert job_row is None
|
||||||
|
# A helpful error should appear somewhere in the body.
|
||||||
|
assert (
|
||||||
|
"error" in resp.text.lower()
|
||||||
|
or "invalid" in resp.text.lower()
|
||||||
|
or "json" in resp.text.lower()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestFragment:
|
||||||
|
def test_fragment_pending_has_trigger(
|
||||||
|
self,
|
||||||
|
app: TestClient,
|
||||||
|
postgres_url: str,
|
||||||
|
) -> None:
|
||||||
|
request_id = f"ui-frag-p-{uuid4().hex[:8]}"
|
||||||
|
with FIXTURE_PDF.open("rb") as fh:
|
||||||
|
app.post(
|
||||||
|
"/ui/jobs",
|
||||||
|
data={
|
||||||
|
"use_case_mode": "registered",
|
||||||
|
"use_case_name": "bank_statement_header",
|
||||||
|
"ix_client_id": "ui-test",
|
||||||
|
"request_id": request_id,
|
||||||
|
},
|
||||||
|
files={"pdf": ("sample.pdf", fh, "application/pdf")},
|
||||||
|
follow_redirects=False,
|
||||||
|
)
|
||||||
|
job_row = _find_job(postgres_url, "ui-test", request_id)
|
||||||
|
assert job_row is not None
|
||||||
|
|
||||||
|
resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
body = resp.text
|
||||||
|
# Pending → auto-refresh every 2s.
|
||||||
|
assert "hx-trigger" in body
|
||||||
|
assert "2s" in body
|
||||||
|
assert "pending" in body.lower() or "running" in body.lower()
|
||||||
|
|
||||||
|
def test_fragment_done_shows_pretty_json(
|
||||||
|
self,
|
||||||
|
app: TestClient,
|
||||||
|
postgres_url: str,
|
||||||
|
) -> None:
|
||||||
|
request_id = f"ui-frag-d-{uuid4().hex[:8]}"
|
||||||
|
with FIXTURE_PDF.open("rb") as fh:
|
||||||
|
app.post(
|
||||||
|
"/ui/jobs",
|
||||||
|
data={
|
||||||
|
"use_case_mode": "registered",
|
||||||
|
"use_case_name": "bank_statement_header",
|
||||||
|
"ix_client_id": "ui-test",
|
||||||
|
"request_id": request_id,
|
||||||
|
},
|
||||||
|
files={"pdf": ("sample.pdf", fh, "application/pdf")},
|
||||||
|
follow_redirects=False,
|
||||||
|
)
|
||||||
|
job_row = _find_job(postgres_url, "ui-test", request_id)
|
||||||
|
assert job_row is not None
|
||||||
|
|
||||||
|
# Hand-tick the row to done with a fake response.
|
||||||
|
_force_done(
|
||||||
|
postgres_url,
|
||||||
|
job_row.job_id,
|
||||||
|
response_body={
|
||||||
|
"use_case": "bank_statement_header",
|
||||||
|
"ix_result": {"result": {"bank_name": "UBS AG", "currency": "CHF"}},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
|
||||||
|
assert resp.status_code == 200
|
||||||
|
body = resp.text
|
||||||
|
# Terminal → no auto-refresh.
|
||||||
|
assert "every 2s" not in body and "every 2s" not in body
|
||||||
|
# JSON present.
|
||||||
|
assert "UBS AG" in body
|
||||||
|
assert "CHF" in body
|
||||||
|
|
||||||
|
|
||||||
|
def _find_job(postgres_url: str, client_id: str, request_id: str): # type: ignore[no-untyped-def]
|
||||||
|
"""Look up an ``ix_jobs`` row via the async engine, wrapping the coroutine
|
||||||
|
for test convenience."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json as _json
|
||||||
|
|
||||||
|
async def _go(): # type: ignore[no-untyped-def]
|
||||||
|
eng = create_async_engine(postgres_url)
|
||||||
|
sf = async_sessionmaker(eng, expire_on_commit=False)
|
||||||
|
try:
|
||||||
|
async with sf() as session:
|
||||||
|
r = await session.scalar(
|
||||||
|
select(IxJob).where(
|
||||||
|
IxJob.client_id == client_id,
|
||||||
|
IxJob.request_id == request_id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
if r is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
class _JobRow:
|
||||||
|
pass
|
||||||
|
|
||||||
|
out = _JobRow()
|
||||||
|
out.job_id = r.job_id
|
||||||
|
out.client_id = r.client_id
|
||||||
|
out.request_id = r.request_id
|
||||||
|
out.status = r.status
|
||||||
|
if isinstance(r.request, str):
|
||||||
|
out.request = _json.loads(r.request)
|
||||||
|
else:
|
||||||
|
out.request = r.request
|
||||||
|
return out
|
||||||
|
finally:
|
||||||
|
await eng.dispose()
|
||||||
|
|
||||||
|
return asyncio.run(_go())
|
||||||
|
|
||||||
|
|
||||||
|
def _force_done(
|
||||||
|
postgres_url: str,
|
||||||
|
job_id, # type: ignore[no-untyped-def]
|
||||||
|
response_body: dict,
|
||||||
|
) -> None:
|
||||||
|
"""Flip a pending job to ``done`` with the given response payload."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
|
||||||
|
from sqlalchemy import text
|
||||||
|
|
||||||
|
async def _go(): # type: ignore[no-untyped-def]
|
||||||
|
eng = create_async_engine(postgres_url)
|
||||||
|
try:
|
||||||
|
async with eng.begin() as conn:
|
||||||
|
await conn.execute(
|
||||||
|
text(
|
||||||
|
"UPDATE ix_jobs SET status='done', "
|
||||||
|
"response=CAST(:resp AS JSONB), finished_at=:now "
|
||||||
|
"WHERE job_id=:jid"
|
||||||
|
),
|
||||||
|
{
|
||||||
|
"resp": json.dumps(response_body),
|
||||||
|
"now": datetime.now(UTC),
|
||||||
|
"jid": str(job_id),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
await eng.dispose()
|
||||||
|
|
||||||
|
asyncio.run(_go())
|
||||||
24
uv.lock
24
uv.lock
|
|
@ -7,6 +7,15 @@ resolution-markers = [
|
||||||
"(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
"(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aiofiles"
|
||||||
|
version = "25.1.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/41/c3/534eac40372d8ee36ef40df62ec129bee4fdb5ad9706e58a29be53b2c970/aiofiles-25.1.0.tar.gz", hash = "sha256:a8d728f0a29de45dc521f18f07297428d56992a742f0cd2701ba86e44d23d5b2", size = 46354, upload-time = "2025-10-09T20:51:04.358Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/bc/8a/340a1555ae33d7354dbca4faa54948d76d89a27ceef032c8c3bc661d003e/aiofiles-25.1.0-py3-none-any.whl", hash = "sha256:abe311e527c862958650f9438e859c1fa7568a141b22abcd015e120e86a85695", size = 14668, upload-time = "2025-10-09T20:51:03.174Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "alembic"
|
name = "alembic"
|
||||||
version = "1.18.4"
|
version = "1.18.4"
|
||||||
|
|
@ -521,16 +530,19 @@ name = "infoxtractor"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
{ name = "aiofiles" },
|
||||||
{ name = "alembic" },
|
{ name = "alembic" },
|
||||||
{ name = "asyncpg" },
|
{ name = "asyncpg" },
|
||||||
{ name = "fastapi" },
|
{ name = "fastapi" },
|
||||||
{ name = "httpx" },
|
{ name = "httpx" },
|
||||||
|
{ name = "jinja2" },
|
||||||
{ name = "pillow" },
|
{ name = "pillow" },
|
||||||
{ name = "pydantic" },
|
{ name = "pydantic" },
|
||||||
{ name = "pydantic-settings" },
|
{ name = "pydantic-settings" },
|
||||||
{ name = "pymupdf" },
|
{ name = "pymupdf" },
|
||||||
{ name = "python-dateutil" },
|
{ name = "python-dateutil" },
|
||||||
{ name = "python-magic" },
|
{ name = "python-magic" },
|
||||||
|
{ name = "python-multipart" },
|
||||||
{ name = "sqlalchemy", extra = ["asyncio"] },
|
{ name = "sqlalchemy", extra = ["asyncio"] },
|
||||||
{ name = "uvicorn", extra = ["standard"] },
|
{ name = "uvicorn", extra = ["standard"] },
|
||||||
]
|
]
|
||||||
|
|
@ -550,10 +562,12 @@ ocr = [
|
||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
|
{ name = "aiofiles", specifier = ">=24.1" },
|
||||||
{ name = "alembic", specifier = ">=1.14" },
|
{ name = "alembic", specifier = ">=1.14" },
|
||||||
{ name = "asyncpg", specifier = ">=0.30" },
|
{ name = "asyncpg", specifier = ">=0.30" },
|
||||||
{ name = "fastapi", specifier = ">=0.115" },
|
{ name = "fastapi", specifier = ">=0.115" },
|
||||||
{ name = "httpx", specifier = ">=0.27" },
|
{ name = "httpx", specifier = ">=0.27" },
|
||||||
|
{ name = "jinja2", specifier = ">=3.1" },
|
||||||
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.13" },
|
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.13" },
|
||||||
{ name = "pillow", specifier = ">=10.2,<11.0" },
|
{ name = "pillow", specifier = ">=10.2,<11.0" },
|
||||||
{ name = "pydantic", specifier = ">=2.9" },
|
{ name = "pydantic", specifier = ">=2.9" },
|
||||||
|
|
@ -564,6 +578,7 @@ requires-dist = [
|
||||||
{ name = "pytest-httpx", marker = "extra == 'dev'", specifier = ">=0.32" },
|
{ name = "pytest-httpx", marker = "extra == 'dev'", specifier = ">=0.32" },
|
||||||
{ name = "python-dateutil", specifier = ">=2.9" },
|
{ name = "python-dateutil", specifier = ">=2.9" },
|
||||||
{ name = "python-magic", specifier = ">=0.4.27" },
|
{ name = "python-magic", specifier = ">=0.4.27" },
|
||||||
|
{ name = "python-multipart", specifier = ">=0.0.12" },
|
||||||
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8" },
|
{ name = "ruff", marker = "extra == 'dev'", specifier = ">=0.8" },
|
||||||
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.36" },
|
{ name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.36" },
|
||||||
{ name = "surya-ocr", marker = "extra == 'ocr'", specifier = ">=0.17,<0.18" },
|
{ name = "surya-ocr", marker = "extra == 'ocr'", specifier = ">=0.17,<0.18" },
|
||||||
|
|
@ -1350,6 +1365,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" },
|
{ url = "https://files.pythonhosted.org/packages/6c/73/9f872cb81fc5c3bb48f7227872c28975f998f3e7c2b1c16e95e6432bbb90/python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3", size = 13840, upload-time = "2022-06-07T20:16:57.763Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "python-multipart"
|
||||||
|
version = "0.0.26"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/88/71/b145a380824a960ebd60e1014256dbb7d2253f2316ff2d73dfd8928ec2c3/python_multipart-0.0.26.tar.gz", hash = "sha256:08fadc45918cd615e26846437f50c5d6d23304da32c341f289a617127b081f17", size = 43501, upload-time = "2026-04-10T14:09:59.473Z" }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/9a/22/f1925cdda983ab66fc8ec6ec8014b959262747e58bdca26a4e3d1da29d56/python_multipart-0.0.26-py3-none-any.whl", hash = "sha256:c0b169f8c4484c13b0dcf2ef0ec3a4adb255c4b7d18d8e420477d2b1dd03f185", size = 28847, upload-time = "2026-04-10T14:09:58.131Z" },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyyaml"
|
name = "pyyaml"
|
||||||
version = "6.0.3"
|
version = "6.0.3"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue