infoxtractor/tests/integration/test_ui_routes.py
Dirk Riemann 673dc60178
All checks were successful
tests / test (push) Successful in 1m27s
tests / test (pull_request) Successful in 1m17s
feat(ui): add /ui/jobs listing page with filters + pagination
* `JobsRepo.list_recent` — paginated, filterable view over ix_jobs,
  newest first, returning (jobs, total) so the template can render
  "Showing N of M".
* `GET /ui/jobs` — filter bar (multi status + client_id), prev/next
  pagination, links to `/ui/jobs/{id}` per row. Surfaces filename from
  `FileRef.display_name` with URL-basename fallback for legacy rows.
* Persistent nav header gets a "Recent jobs" link on both `/ui` and the
  per-job page so users can tab between submit and history.
* Integration tests cover: ordering, status/client filters (single +
  multi), pagination, legacy fallback, header links.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 22:28:10 +02:00

792 lines
27 KiB
Python

"""Integration tests for the `/ui` router (spec §PR 2).
Covers the full round-trip through `POST /ui/jobs` — the handler parses
multipart form data into a `RequestIX` and hands it to
`ix.store.jobs_repo.insert_pending`, the same entry point the REST adapter
uses. Tests assert the job row exists with the right client/request ids and
that custom-use-case forms produce a `use_case_inline` block in the stored
request JSON.
The DB-touching tests depend on the shared integration conftest which
spins up migrations against the configured Postgres; the pure-template
tests (`GET /ui` and the fragment renderer) still need a factory but
won't actually query — they're cheap.
"""
from __future__ import annotations
import json
from collections.abc import Iterator
from pathlib import Path
from uuid import UUID, uuid4
import pytest
from fastapi.testclient import TestClient
from sqlalchemy import select
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
from ix.adapters.rest.routes import Probes, get_probes, get_session_factory_dep
from ix.app import create_app
from ix.store.models import IxJob
FIXTURE_DIR = Path(__file__).resolve().parents[1] / "fixtures"
FIXTURE_PDF = FIXTURE_DIR / "synthetic_giro.pdf"
def _factory_for_url(postgres_url: str): # type: ignore[no-untyped-def]
def _factory(): # type: ignore[no-untyped-def]
eng = create_async_engine(postgres_url, pool_pre_ping=True)
return async_sessionmaker(eng, expire_on_commit=False)
return _factory
@pytest.fixture
def app(postgres_url: str) -> Iterator[TestClient]:
app_obj = create_app(spawn_worker=False)
app_obj.dependency_overrides[get_session_factory_dep] = _factory_for_url(
postgres_url
)
app_obj.dependency_overrides[get_probes] = lambda: Probes(
ollama=lambda: "ok", ocr=lambda: "ok"
)
with TestClient(app_obj) as client:
yield client
class TestIndexPage:
def test_index_returns_html(self, app: TestClient) -> None:
resp = app.get("/ui")
assert resp.status_code == 200
assert "text/html" in resp.headers["content-type"]
body = resp.text
# Dropdown prefilled with the registered use case.
assert "bank_statement_header" in body
# Marker for the submission form.
assert '<form' in body
def test_static_mount_is_reachable(self, app: TestClient) -> None:
# StaticFiles returns 404 for the keepfile; the mount itself must
# exist so asset URLs resolve. We probe the directory root instead.
resp = app.get("/ui/static/.gitkeep")
# .gitkeep exists in the repo — expect 200 (or at minimum not a 404
# due to missing mount). A 405/403 would also indicate the mount is
# wired; we assert the response is *not* a 404 from a missing route.
assert resp.status_code != 404
class TestSubmitJobRegistered:
def test_post_registered_use_case_creates_row(
self,
app: TestClient,
postgres_url: str,
) -> None:
request_id = f"ui-reg-{uuid4().hex[:8]}"
with FIXTURE_PDF.open("rb") as fh:
resp = app.post(
"/ui/jobs",
data={
"use_case_mode": "registered",
"use_case_name": "bank_statement_header",
"ix_client_id": "ui-test",
"request_id": request_id,
"texts": "",
"use_ocr": "on",
"include_provenance": "on",
"max_sources_per_field": "10",
},
files={"pdf": ("sample.pdf", fh, "application/pdf")},
follow_redirects=False,
)
assert resp.status_code in (200, 303), resp.text
# Assert the row exists in the DB.
job_row = _find_job(postgres_url, "ui-test", request_id)
assert job_row is not None
assert job_row.status == "pending"
assert job_row.request["use_case"] == "bank_statement_header"
# Context.files must reference a local file:// path.
files = job_row.request["context"]["files"]
assert len(files) == 1
entry = files[0]
url = entry if isinstance(entry, str) else entry["url"]
assert url.startswith("file://")
def test_htmx_submit_uses_hx_redirect_header(
self,
app: TestClient,
) -> None:
request_id = f"ui-htmx-{uuid4().hex[:8]}"
with FIXTURE_PDF.open("rb") as fh:
resp = app.post(
"/ui/jobs",
data={
"use_case_mode": "registered",
"use_case_name": "bank_statement_header",
"ix_client_id": "ui-test",
"request_id": request_id,
},
files={"pdf": ("sample.pdf", fh, "application/pdf")},
headers={"HX-Request": "true"},
follow_redirects=False,
)
assert resp.status_code == 200
assert "HX-Redirect" in resp.headers
class TestSubmitJobCustom:
def test_post_custom_use_case_stores_inline(
self,
app: TestClient,
postgres_url: str,
) -> None:
request_id = f"ui-cust-{uuid4().hex[:8]}"
fields_json = json.dumps(
[
{"name": "vendor", "type": "str", "required": True},
{"name": "total", "type": "decimal"},
]
)
with FIXTURE_PDF.open("rb") as fh:
resp = app.post(
"/ui/jobs",
data={
"use_case_mode": "custom",
"use_case_name": "invoice_adhoc",
"ix_client_id": "ui-test",
"request_id": request_id,
"system_prompt": "Extract vendor and total.",
"default_model": "qwen3:14b",
"fields_json": fields_json,
},
files={"pdf": ("sample.pdf", fh, "application/pdf")},
follow_redirects=False,
)
assert resp.status_code in (200, 303), resp.text
job_row = _find_job(postgres_url, "ui-test", request_id)
assert job_row is not None
stored = job_row.request["use_case_inline"]
assert stored is not None
assert stored["use_case_name"] == "invoice_adhoc"
assert stored["system_prompt"] == "Extract vendor and total."
names = [f["name"] for f in stored["fields"]]
assert names == ["vendor", "total"]
def test_post_malformed_fields_json_rejected(
self,
app: TestClient,
postgres_url: str,
) -> None:
request_id = f"ui-bad-{uuid4().hex[:8]}"
with FIXTURE_PDF.open("rb") as fh:
resp = app.post(
"/ui/jobs",
data={
"use_case_mode": "custom",
"use_case_name": "adhoc_bad",
"ix_client_id": "ui-test",
"request_id": request_id,
"system_prompt": "p",
"fields_json": "this is not json",
},
files={"pdf": ("sample.pdf", fh, "application/pdf")},
follow_redirects=False,
)
# Either re-rendered form (422 / 200 with error) — what matters is
# that no row was inserted.
assert resp.status_code in (200, 400, 422)
job_row = _find_job(postgres_url, "ui-test", request_id)
assert job_row is None
# A helpful error should appear somewhere in the body.
assert (
"error" in resp.text.lower()
or "invalid" in resp.text.lower()
or "json" in resp.text.lower()
)
class TestDisplayName:
def test_post_persists_display_name_in_file_ref(
self,
app: TestClient,
postgres_url: str,
) -> None:
"""The client-provided upload filename lands in FileRef.display_name."""
request_id = f"ui-name-{uuid4().hex[:8]}"
with FIXTURE_PDF.open("rb") as fh:
resp = app.post(
"/ui/jobs",
data={
"use_case_mode": "registered",
"use_case_name": "bank_statement_header",
"ix_client_id": "ui-test",
"request_id": request_id,
},
files={
"pdf": ("my statement.pdf", fh, "application/pdf")
},
follow_redirects=False,
)
assert resp.status_code in (200, 303), resp.text
job_row = _find_job(postgres_url, "ui-test", request_id)
assert job_row is not None
entry = job_row.request["context"]["files"][0]
assert isinstance(entry, dict)
assert entry["display_name"] == "my statement.pdf"
class TestFragment:
def test_fragment_pending_has_trigger(
self,
app: TestClient,
postgres_url: str,
) -> None:
request_id = f"ui-frag-p-{uuid4().hex[:8]}"
with FIXTURE_PDF.open("rb") as fh:
app.post(
"/ui/jobs",
data={
"use_case_mode": "registered",
"use_case_name": "bank_statement_header",
"ix_client_id": "ui-test",
"request_id": request_id,
},
files={"pdf": ("sample.pdf", fh, "application/pdf")},
follow_redirects=False,
)
job_row = _find_job(postgres_url, "ui-test", request_id)
assert job_row is not None
resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
assert resp.status_code == 200
body = resp.text
# Pending → auto-refresh every 2s.
assert "hx-trigger" in body
assert "2s" in body
assert "pending" in body.lower() or "running" in body.lower()
# New queue-awareness copy.
assert "Queue position" in body or "About to start" in body
def test_fragment_pending_shows_filename(
self,
app: TestClient,
postgres_url: str,
) -> None:
request_id = f"ui-frag-pf-{uuid4().hex[:8]}"
with FIXTURE_PDF.open("rb") as fh:
app.post(
"/ui/jobs",
data={
"use_case_mode": "registered",
"use_case_name": "bank_statement_header",
"ix_client_id": "ui-test",
"request_id": request_id,
},
files={
"pdf": (
"client-side-name.pdf",
fh,
"application/pdf",
)
},
follow_redirects=False,
)
job_row = _find_job(postgres_url, "ui-test", request_id)
assert job_row is not None
resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
assert resp.status_code == 200
assert "client-side-name.pdf" in resp.text
def test_fragment_running_shows_elapsed(
self,
app: TestClient,
postgres_url: str,
) -> None:
"""After flipping a row to running with a backdated started_at, the
fragment renders a ``Running for MM:SS`` line."""
request_id = f"ui-frag-r-{uuid4().hex[:8]}"
with FIXTURE_PDF.open("rb") as fh:
app.post(
"/ui/jobs",
data={
"use_case_mode": "registered",
"use_case_name": "bank_statement_header",
"ix_client_id": "ui-test",
"request_id": request_id,
},
files={"pdf": ("sample.pdf", fh, "application/pdf")},
follow_redirects=False,
)
job_row = _find_job(postgres_url, "ui-test", request_id)
assert job_row is not None
_force_running(postgres_url, job_row.job_id)
resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
assert resp.status_code == 200
body = resp.text
assert "Running for" in body
# MM:SS; our backdate is ~10s so expect 00:1? or higher.
import re
assert re.search(r"\d{2}:\d{2}", body), body
def test_fragment_backward_compat_no_display_name(
self,
app: TestClient,
postgres_url: str,
) -> None:
"""Older rows (stored before display_name existed) must still render."""
from ix.contracts.request import Context, FileRef, RequestIX
legacy_req = RequestIX(
use_case="bank_statement_header",
ix_client_id="ui-test",
request_id=f"ui-legacy-{uuid4().hex[:8]}",
context=Context(
files=[
FileRef(url="file:///tmp/ix/ui/legacy.pdf")
]
),
)
import asyncio
from ix.store import jobs_repo as _repo
async def _insert() -> UUID:
eng = create_async_engine(postgres_url)
sf = async_sessionmaker(eng, expire_on_commit=False)
try:
async with sf() as session:
job = await _repo.insert_pending(
session, legacy_req, callback_url=None
)
await session.commit()
return job.job_id
finally:
await eng.dispose()
job_id = asyncio.run(_insert())
resp = app.get(f"/ui/jobs/{job_id}/fragment")
assert resp.status_code == 200
body = resp.text
# Must not crash; must include the fallback basename from the URL.
assert "legacy.pdf" in body
def test_fragment_done_shows_pretty_json(
self,
app: TestClient,
postgres_url: str,
) -> None:
request_id = f"ui-frag-d-{uuid4().hex[:8]}"
with FIXTURE_PDF.open("rb") as fh:
app.post(
"/ui/jobs",
data={
"use_case_mode": "registered",
"use_case_name": "bank_statement_header",
"ix_client_id": "ui-test",
"request_id": request_id,
},
files={
"pdf": (
"my-done-doc.pdf",
fh,
"application/pdf",
)
},
follow_redirects=False,
)
job_row = _find_job(postgres_url, "ui-test", request_id)
assert job_row is not None
# Hand-tick the row to done with a fake response.
_force_done(
postgres_url,
job_row.job_id,
response_body={
"use_case": "bank_statement_header",
"ix_result": {"result": {"bank_name": "UBS AG", "currency": "CHF"}},
},
)
resp = app.get(f"/ui/jobs/{job_row.job_id}/fragment")
assert resp.status_code == 200
body = resp.text
# Terminal → no auto-refresh.
assert "every 2s" not in body and "every 2s" not in body
# JSON present.
assert "UBS AG" in body
assert "CHF" in body
# Filename surfaced on the done fragment.
assert "my-done-doc.pdf" in body
class TestJobsListPage:
"""Tests for the ``GET /ui/jobs`` listing page (feat/ui-jobs-list)."""
def _submit(
self,
app: TestClient,
client_id: str,
request_id: str,
filename: str = "sample.pdf",
) -> None:
with FIXTURE_PDF.open("rb") as fh:
app.post(
"/ui/jobs",
data={
"use_case_mode": "registered",
"use_case_name": "bank_statement_header",
"ix_client_id": client_id,
"request_id": request_id,
},
files={"pdf": (filename, fh, "application/pdf")},
follow_redirects=False,
)
def test_jobs_list_returns_html(
self,
app: TestClient,
postgres_url: str,
) -> None:
for i in range(3):
self._submit(
app,
"ui-list",
f"lp-{uuid4().hex[:6]}-{i}",
filename=f"doc-{i}.pdf",
)
resp = app.get("/ui/jobs")
assert resp.status_code == 200
assert "text/html" in resp.headers["content-type"]
body = resp.text
# Breadcrumb / header shows "Jobs".
assert "Jobs" in body
# display_name surfaces for each row.
for i in range(3):
assert f"doc-{i}.pdf" in body
# Showing N of M counter present.
assert "Showing" in body
assert "of" in body
def test_jobs_list_links_to_job_detail(
self,
app: TestClient,
postgres_url: str,
) -> None:
rid = f"lp-link-{uuid4().hex[:6]}"
self._submit(app, "ui-list", rid)
row = _find_job(postgres_url, "ui-list", rid)
assert row is not None
resp = app.get("/ui/jobs")
assert resp.status_code == 200
assert f"/ui/jobs/{row.job_id}" in resp.text
def test_jobs_list_status_filter_single(
self,
app: TestClient,
postgres_url: str,
) -> None:
# Create two jobs, flip one to done.
rid_pending = f"lp-p-{uuid4().hex[:6]}"
rid_done = f"lp-d-{uuid4().hex[:6]}"
self._submit(app, "ui-filt", rid_pending, filename="pending-doc.pdf")
self._submit(app, "ui-filt", rid_done, filename="done-doc.pdf")
done_row = _find_job(postgres_url, "ui-filt", rid_done)
assert done_row is not None
_force_done(
postgres_url,
done_row.job_id,
response_body={"use_case": "bank_statement_header"},
)
# ?status=done → only done row shown.
resp = app.get("/ui/jobs?status=done")
assert resp.status_code == 200
assert "done-doc.pdf" in resp.text
assert "pending-doc.pdf" not in resp.text
def test_jobs_list_status_filter_multi(
self,
app: TestClient,
postgres_url: str,
) -> None:
rid_p = f"lp-mp-{uuid4().hex[:6]}"
rid_d = f"lp-md-{uuid4().hex[:6]}"
rid_e = f"lp-me-{uuid4().hex[:6]}"
self._submit(app, "ui-multi", rid_p, filename="pending-m.pdf")
self._submit(app, "ui-multi", rid_d, filename="done-m.pdf")
self._submit(app, "ui-multi", rid_e, filename="error-m.pdf")
done_row = _find_job(postgres_url, "ui-multi", rid_d)
err_row = _find_job(postgres_url, "ui-multi", rid_e)
assert done_row is not None and err_row is not None
_force_done(
postgres_url,
done_row.job_id,
response_body={"use_case": "bank_statement_header"},
)
_force_error(postgres_url, err_row.job_id)
resp = app.get("/ui/jobs?status=done&status=error")
assert resp.status_code == 200
body = resp.text
assert "done-m.pdf" in body
assert "error-m.pdf" in body
assert "pending-m.pdf" not in body
def test_jobs_list_client_id_filter(
self,
app: TestClient,
postgres_url: str,
) -> None:
rid_a = f"lp-a-{uuid4().hex[:6]}"
rid_b = f"lp-b-{uuid4().hex[:6]}"
self._submit(app, "client-alpha", rid_a, filename="alpha.pdf")
self._submit(app, "client-beta", rid_b, filename="beta.pdf")
resp = app.get("/ui/jobs?client_id=client-alpha")
assert resp.status_code == 200
body = resp.text
assert "alpha.pdf" in body
assert "beta.pdf" not in body
def test_jobs_list_pagination(
self,
app: TestClient,
postgres_url: str,
) -> None:
rids = []
for i in range(7):
rid = f"lp-pg-{uuid4().hex[:6]}-{i}"
rids.append(rid)
self._submit(app, "ui-pg", rid, filename=f"pg-{i}.pdf")
resp_p1 = app.get("/ui/jobs?limit=5&offset=0&client_id=ui-pg")
assert resp_p1.status_code == 200
body_p1 = resp_p1.text
# Newest-first: last 5 uploaded are pg-6..pg-2.
for i in (2, 3, 4, 5, 6):
assert f"pg-{i}.pdf" in body_p1
assert "pg-1.pdf" not in body_p1
assert "pg-0.pdf" not in body_p1
resp_p2 = app.get("/ui/jobs?limit=5&offset=5&client_id=ui-pg")
assert resp_p2.status_code == 200
body_p2 = resp_p2.text
assert "pg-1.pdf" in body_p2
assert "pg-0.pdf" in body_p2
# Showing 2 of 7 on page 2.
assert "of 7" in body_p2
def test_jobs_list_missing_display_name_falls_back_to_basename(
self,
app: TestClient,
postgres_url: str,
) -> None:
"""Legacy rows without display_name must still render via basename."""
from ix.contracts.request import Context, FileRef, RequestIX
legacy_req = RequestIX(
use_case="bank_statement_header",
ix_client_id="ui-legacy",
request_id=f"lp-legacy-{uuid4().hex[:6]}",
context=Context(
files=[FileRef(url="file:///tmp/ix/ui/listing-legacy.pdf")]
),
)
import asyncio
from ix.store import jobs_repo as _repo
async def _insert() -> UUID:
eng = create_async_engine(postgres_url)
sf = async_sessionmaker(eng, expire_on_commit=False)
try:
async with sf() as session:
job = await _repo.insert_pending(
session, legacy_req, callback_url=None
)
await session.commit()
return job.job_id
finally:
await eng.dispose()
asyncio.run(_insert())
resp = app.get("/ui/jobs?client_id=ui-legacy")
assert resp.status_code == 200
assert "listing-legacy.pdf" in resp.text
def test_jobs_list_header_link_from_index(
self,
app: TestClient,
) -> None:
resp = app.get("/ui")
assert resp.status_code == 200
assert 'href="/ui/jobs"' in resp.text
def test_jobs_list_header_link_from_detail(
self,
app: TestClient,
postgres_url: str,
) -> None:
rid = f"lp-hd-{uuid4().hex[:6]}"
self._submit(app, "ui-hd", rid)
row = _find_job(postgres_url, "ui-hd", rid)
assert row is not None
resp = app.get(f"/ui/jobs/{row.job_id}")
assert resp.status_code == 200
assert 'href="/ui/jobs"' in resp.text
def _force_error(
postgres_url: str,
job_id, # type: ignore[no-untyped-def]
) -> None:
"""Flip a pending/running job to ``error`` with a canned error body."""
import asyncio
from datetime import UTC, datetime
from sqlalchemy import text
async def _go(): # type: ignore[no-untyped-def]
eng = create_async_engine(postgres_url)
try:
async with eng.begin() as conn:
await conn.execute(
text(
"UPDATE ix_jobs SET status='error', "
"response=CAST(:resp AS JSONB), finished_at=:now "
"WHERE job_id=:jid"
),
{
"resp": json.dumps({"error": "IX_002_000: forced"}),
"now": datetime.now(UTC),
"jid": str(job_id),
},
)
finally:
await eng.dispose()
asyncio.run(_go())
def _find_job(postgres_url: str, client_id: str, request_id: str): # type: ignore[no-untyped-def]
"""Look up an ``ix_jobs`` row via the async engine, wrapping the coroutine
for test convenience."""
import asyncio
import json as _json
async def _go(): # type: ignore[no-untyped-def]
eng = create_async_engine(postgres_url)
sf = async_sessionmaker(eng, expire_on_commit=False)
try:
async with sf() as session:
r = await session.scalar(
select(IxJob).where(
IxJob.client_id == client_id,
IxJob.request_id == request_id,
)
)
if r is None:
return None
class _JobRow:
pass
out = _JobRow()
out.job_id = r.job_id
out.client_id = r.client_id
out.request_id = r.request_id
out.status = r.status
if isinstance(r.request, str):
out.request = _json.loads(r.request)
else:
out.request = r.request
return out
finally:
await eng.dispose()
return asyncio.run(_go())
def _force_done(
postgres_url: str,
job_id, # type: ignore[no-untyped-def]
response_body: dict,
) -> None:
"""Flip a pending job to ``done`` with the given response payload."""
import asyncio
from datetime import UTC, datetime
from sqlalchemy import text
async def _go(): # type: ignore[no-untyped-def]
eng = create_async_engine(postgres_url)
try:
async with eng.begin() as conn:
await conn.execute(
text(
"UPDATE ix_jobs SET status='done', "
"response=CAST(:resp AS JSONB), finished_at=:now "
"WHERE job_id=:jid"
),
{
"resp": json.dumps(response_body),
"now": datetime.now(UTC),
"jid": str(job_id),
},
)
finally:
await eng.dispose()
asyncio.run(_go())
def _force_running(
postgres_url: str,
job_id, # type: ignore[no-untyped-def]
seconds_ago: int = 10,
) -> None:
"""Flip a pending job to ``running`` with a backdated ``started_at``.
The fragment renders "Running for MM:SS" which needs a ``started_at`` in
the past; 10s is enough to produce a deterministic non-zero MM:SS.
"""
import asyncio
from datetime import UTC, datetime, timedelta
from sqlalchemy import text
async def _go(): # type: ignore[no-untyped-def]
eng = create_async_engine(postgres_url)
try:
async with eng.begin() as conn:
await conn.execute(
text(
"UPDATE ix_jobs SET status='running', started_at=:t "
"WHERE job_id=:jid"
),
{
"t": datetime.now(UTC) - timedelta(seconds=seconds_ago),
"jid": str(job_id),
},
)
finally:
await eng.dispose()
asyncio.run(_go())