infoxtractor/tests/integration/test_jobs_repo.py
Dirk Riemann 673dc60178
All checks were successful
tests / test (push) Successful in 1m27s
tests / test (pull_request) Successful in 1m17s
feat(ui): add /ui/jobs listing page with filters + pagination
* `JobsRepo.list_recent` — paginated, filterable view over ix_jobs,
  newest first, returning (jobs, total) so the template can render
  "Showing N of M".
* `GET /ui/jobs` — filter bar (multi status + client_id), prev/next
  pagination, links to `/ui/jobs/{id}` per row. Surfaces filename from
  `FileRef.display_name` with URL-basename fallback for legacy rows.
* Persistent nav header gets a "Recent jobs" link on both `/ui` and the
  per-job page so users can tab between submit and history.
* Integration tests cover: ordering, status/client filters (single +
  multi), pagination, legacy fallback, header links.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 22:28:10 +02:00

679 lines
23 KiB
Python

"""Integration tests for :mod:`ix.store.jobs_repo` — run against a real DB.
Every test exercises one repo method end-to-end. A few go further and
concurrently spin up two sessions to demonstrate the claim query behaves
correctly under ``SKIP LOCKED`` (two claimers should never see the same row).
Skipped cleanly when no Postgres is configured — see integration/conftest.py.
"""
from __future__ import annotations
import asyncio
from datetime import UTC, datetime, timedelta
from typing import TYPE_CHECKING
from uuid import UUID, uuid4
from ix.contracts.request import Context, RequestIX
from ix.contracts.response import ResponseIX
from ix.store import jobs_repo
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
def _make_request(client: str = "mammon", request_id: str = "r-1") -> RequestIX:
return RequestIX(
use_case="bank_statement_header",
ix_client_id=client,
request_id=request_id,
context=Context(texts=["hello"]),
)
async def test_insert_pending_creates_row_and_assigns_ix_id(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
job = await jobs_repo.insert_pending(
session, _make_request(), callback_url=None
)
await session.commit()
assert job.status == "pending"
assert isinstance(job.job_id, UUID)
# ix_id is a 16-hex string per spec §3 — transport-assigned.
assert isinstance(job.ix_id, str)
assert len(job.ix_id) == 16
assert all(c in "0123456789abcdef" for c in job.ix_id)
assert job.attempts == 0
async def test_insert_pending_is_idempotent_on_correlation_key(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""(client_id, request_id) collides → existing row comes back unchanged."""
async with session_factory() as session:
first = await jobs_repo.insert_pending(
session, _make_request("mammon", "same-id"), callback_url="http://x/cb"
)
await session.commit()
async with session_factory() as session:
second = await jobs_repo.insert_pending(
session, _make_request("mammon", "same-id"), callback_url="http://y/cb"
)
await session.commit()
assert second.job_id == first.job_id
assert second.ix_id == first.ix_id
# The callback_url of the FIRST insert wins — we don't overwrite.
assert second.callback_url == "http://x/cb"
async def test_get_returns_full_job(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request(), callback_url=None
)
await session.commit()
async with session_factory() as session:
fetched = await jobs_repo.get(session, inserted.job_id)
assert fetched is not None
assert fetched.job_id == inserted.job_id
assert fetched.request.use_case == "bank_statement_header"
assert fetched.status == "pending"
async def test_get_unknown_id_returns_none(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
result = await jobs_repo.get(session, uuid4())
assert result is None
async def test_get_by_correlation(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request("mammon", "req-42"), callback_url=None
)
await session.commit()
async with session_factory() as session:
found = await jobs_repo.get_by_correlation(session, "mammon", "req-42")
assert found is not None
assert found.job_id == inserted.job_id
async with session_factory() as session:
missing = await jobs_repo.get_by_correlation(session, "mammon", "nope")
assert missing is None
async def test_claim_next_pending_advances_status(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request(), callback_url=None
)
await session.commit()
async with session_factory() as session:
claimed = await jobs_repo.claim_next_pending(session)
await session.commit()
assert claimed is not None
assert claimed.job_id == inserted.job_id
assert claimed.status == "running"
assert claimed.started_at is not None
async def test_claim_next_pending_returns_none_when_empty(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
claimed = await jobs_repo.claim_next_pending(session)
await session.commit()
assert claimed is None
async def test_claim_next_pending_skips_locked(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""Two concurrent claimers pick different rows (SKIP LOCKED in action)."""
async with session_factory() as session:
a = await jobs_repo.insert_pending(
session, _make_request("c", "a"), callback_url=None
)
b = await jobs_repo.insert_pending(
session, _make_request("c", "b"), callback_url=None
)
await session.commit()
session_a = session_factory()
session_b = session_factory()
try:
# Start the first claim but *don't* commit yet — its row is locked.
first = await jobs_repo.claim_next_pending(session_a)
# Second claimer runs while the first is still holding its lock. It
# must see the 'a' row as pending but SKIP it, returning the 'b' row.
second = await jobs_repo.claim_next_pending(session_b)
assert first is not None and second is not None
assert {first.job_id, second.job_id} == {a.job_id, b.job_id}
assert first.job_id != second.job_id
await session_a.commit()
await session_b.commit()
finally:
await session_a.close()
await session_b.close()
async def test_mark_done_writes_response_and_finishes(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request(), callback_url=None
)
await session.commit()
response = ResponseIX(
use_case="bank_statement_header",
ix_client_id="mammon",
request_id="r-1",
)
async with session_factory() as session:
await jobs_repo.mark_done(session, inserted.job_id, response)
await session.commit()
async with session_factory() as session:
after = await jobs_repo.get(session, inserted.job_id)
assert after is not None
assert after.status == "done"
assert after.response is not None
assert after.finished_at is not None
async def test_mark_done_with_error_response_moves_to_error(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""`done` iff response.error is None — otherwise status='error'."""
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request(), callback_url=None
)
await session.commit()
bad = ResponseIX(error="IX_002_000: boom")
async with session_factory() as session:
await jobs_repo.mark_done(session, inserted.job_id, bad)
await session.commit()
async with session_factory() as session:
after = await jobs_repo.get(session, inserted.job_id)
assert after is not None
assert after.status == "error"
assert after.response is not None
assert (after.response.error or "").startswith("IX_002_000")
async def test_mark_error_always_error(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request(), callback_url=None
)
await session.commit()
bad = ResponseIX(error="IX_000_005: unsupported")
async with session_factory() as session:
await jobs_repo.mark_error(session, inserted.job_id, bad)
await session.commit()
async with session_factory() as session:
after = await jobs_repo.get(session, inserted.job_id)
assert after is not None
assert after.status == "error"
async def test_update_callback_status(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request(), callback_url="http://cb"
)
await session.commit()
async with session_factory() as session:
await jobs_repo.update_callback_status(session, inserted.job_id, "delivered")
await session.commit()
async with session_factory() as session:
after = await jobs_repo.get(session, inserted.job_id)
assert after is not None
assert after.callback_status == "delivered"
async def test_sweep_orphans_resets_stale_running(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""Running rows older than (now - max_running_seconds) go back to pending."""
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request(), callback_url=None
)
await session.commit()
# Backdate started_at by an hour to simulate a crashed worker mid-job.
async with session_factory() as session:
from sqlalchemy import text
stale = datetime.now(UTC) - timedelta(hours=1)
await session.execute(
text(
"UPDATE ix_jobs SET status='running', started_at=:t "
"WHERE job_id=:jid"
),
{"t": stale, "jid": inserted.job_id},
)
await session.commit()
# Max age of 60 s → our hour-old row gets swept.
async with session_factory() as session:
rescued = await jobs_repo.sweep_orphans(
session, datetime.now(UTC), max_running_seconds=60
)
await session.commit()
assert inserted.job_id in rescued
async with session_factory() as session:
after = await jobs_repo.get(session, inserted.job_id)
assert after is not None
assert after.status == "pending"
assert after.attempts == 1
async def test_sweep_orphans_leaves_fresh_running_alone(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""A just-claimed row must not get reclaimed by the sweeper."""
async with session_factory() as session:
await jobs_repo.insert_pending(session, _make_request(), callback_url=None)
await session.commit()
async with session_factory() as session:
claimed = await jobs_repo.claim_next_pending(session)
await session.commit()
assert claimed is not None
# Sweep with a huge threshold (1 hour). Our just-claimed row is fresh, so
# it stays running.
async with session_factory() as session:
rescued = await jobs_repo.sweep_orphans(
session, datetime.now(UTC), max_running_seconds=3600
)
await session.commit()
assert rescued == []
async with session_factory() as session:
after = await jobs_repo.get(session, claimed.job_id)
assert after is not None
assert after.status == "running"
async def test_queue_position_pending_only(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""Three pending rows in insertion order → positions 0, 1, 2; total 3.
Each row is committed in its own transaction so the DB stamps a
distinct ``created_at`` per row (``now()`` is transaction-stable).
"""
async with session_factory() as session:
a = await jobs_repo.insert_pending(
session, _make_request("c", "qp-a"), callback_url=None
)
await session.commit()
async with session_factory() as session:
b = await jobs_repo.insert_pending(
session, _make_request("c", "qp-b"), callback_url=None
)
await session.commit()
async with session_factory() as session:
c = await jobs_repo.insert_pending(
session, _make_request("c", "qp-c"), callback_url=None
)
await session.commit()
async with session_factory() as session:
pa = await jobs_repo.queue_position(session, a.job_id)
pb = await jobs_repo.queue_position(session, b.job_id)
pc = await jobs_repo.queue_position(session, c.job_id)
# All three active; total == 3.
assert pa == (0, 3)
assert pb == (1, 3)
assert pc == (2, 3)
async def test_queue_position_running_plus_pending(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""One running + two pending → running:(0,3), next:(1,3), last:(2,3)."""
async with session_factory() as session:
first = await jobs_repo.insert_pending(
session, _make_request("c", "qp-r-1"), callback_url=None
)
await session.commit()
async with session_factory() as session:
second = await jobs_repo.insert_pending(
session, _make_request("c", "qp-r-2"), callback_url=None
)
await session.commit()
async with session_factory() as session:
third = await jobs_repo.insert_pending(
session, _make_request("c", "qp-r-3"), callback_url=None
)
await session.commit()
# Claim the first → it becomes running.
async with session_factory() as session:
claimed = await jobs_repo.claim_next_pending(session)
await session.commit()
assert claimed is not None
assert claimed.job_id == first.job_id
async with session_factory() as session:
p_running = await jobs_repo.queue_position(session, first.job_id)
p_second = await jobs_repo.queue_position(session, second.job_id)
p_third = await jobs_repo.queue_position(session, third.job_id)
# Running row reports 0 ahead (itself is the head).
assert p_running == (0, 3)
# Second pending: running is ahead (1) + zero older pendings.
assert p_second == (1, 3)
# Third pending: running ahead + one older pending.
assert p_third == (2, 3)
async def test_queue_position_terminal_returns_zero_zero(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""Finished jobs have no queue position — always (0, 0)."""
async with session_factory() as session:
inserted = await jobs_repo.insert_pending(
session, _make_request("c", "qp-term"), callback_url=None
)
await session.commit()
response = ResponseIX(
use_case="bank_statement_header",
ix_client_id="c",
request_id="qp-term",
)
async with session_factory() as session:
await jobs_repo.mark_done(session, inserted.job_id, response)
await session.commit()
async with session_factory() as session:
pos = await jobs_repo.queue_position(session, inserted.job_id)
assert pos == (0, 0)
async def test_queue_position_unknown_id_returns_zero_zero(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
pos = await jobs_repo.queue_position(session, uuid4())
assert pos == (0, 0)
async def test_concurrent_claim_never_double_dispatches(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""Spin a batch of concurrent claimers; every insert is claimed exactly once."""
async with session_factory() as session:
ids = []
for i in range(5):
job = await jobs_repo.insert_pending(
session, _make_request("mass", f"r-{i}"), callback_url=None
)
ids.append(job.job_id)
await session.commit()
async def claim_one() -> UUID | None:
async with session_factory() as session:
claimed = await jobs_repo.claim_next_pending(session)
await session.commit()
return claimed.job_id if claimed else None
results = await asyncio.gather(*(claim_one() for _ in range(10)))
non_null = [r for r in results if r is not None]
# Every inserted id appears at most once.
assert sorted(non_null) == sorted(ids)
# ---------- list_recent ---------------------------------------------------
#
# The UI's ``/ui/jobs`` page needs a paginated, filterable view of recent
# jobs. We keep the contract intentionally small: list_recent returns
# ``(jobs, total)`` — ``total`` is the count after filters but before
# limit/offset — so the template can render "Showing N of M".
async def test_list_recent_empty_db(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
jobs, total = await jobs_repo.list_recent(session, limit=50, offset=0)
assert jobs == []
assert total == 0
async def test_list_recent_orders_newest_first(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
ids: list[UUID] = []
for i in range(3):
async with session_factory() as session:
job = await jobs_repo.insert_pending(
session, _make_request("c", f"lr-{i}"), callback_url=None
)
await session.commit()
ids.append(job.job_id)
async with session_factory() as session:
jobs, total = await jobs_repo.list_recent(session, limit=50, offset=0)
assert total == 3
# Newest first → reverse of insertion order.
assert [j.job_id for j in jobs] == list(reversed(ids))
async def test_list_recent_status_single_filter(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
# Two pending, one done.
async with session_factory() as session:
for i in range(3):
await jobs_repo.insert_pending(
session, _make_request("c", f"sf-{i}"), callback_url=None
)
await session.commit()
async with session_factory() as session:
claimed = await jobs_repo.claim_next_pending(session)
assert claimed is not None
await jobs_repo.mark_done(
session,
claimed.job_id,
ResponseIX(
use_case="bank_statement_header",
ix_client_id="c",
request_id=claimed.request_id,
),
)
await session.commit()
async with session_factory() as session:
done_jobs, done_total = await jobs_repo.list_recent(
session, limit=50, offset=0, status="done"
)
assert done_total == 1
assert len(done_jobs) == 1
assert done_jobs[0].status == "done"
async with session_factory() as session:
pending_jobs, pending_total = await jobs_repo.list_recent(
session, limit=50, offset=0, status="pending"
)
assert pending_total == 2
assert all(j.status == "pending" for j in pending_jobs)
async def test_list_recent_status_iterable_filter(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
# Two pending, one done, one errored.
async with session_factory() as session:
for i in range(4):
await jobs_repo.insert_pending(
session, _make_request("c", f"if-{i}"), callback_url=None
)
await session.commit()
async with session_factory() as session:
a = await jobs_repo.claim_next_pending(session)
assert a is not None
await jobs_repo.mark_done(
session,
a.job_id,
ResponseIX(
use_case="bank_statement_header",
ix_client_id="c",
request_id=a.request_id,
),
)
await session.commit()
async with session_factory() as session:
b = await jobs_repo.claim_next_pending(session)
assert b is not None
await jobs_repo.mark_error(session, b.job_id, ResponseIX(error="boom"))
await session.commit()
async with session_factory() as session:
jobs, total = await jobs_repo.list_recent(
session, limit=50, offset=0, status=["done", "error"]
)
assert total == 2
assert {j.status for j in jobs} == {"done", "error"}
async def test_list_recent_client_id_filter(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
await jobs_repo.insert_pending(
session, _make_request("alpha", "a-1"), callback_url=None
)
await jobs_repo.insert_pending(
session, _make_request("beta", "b-1"), callback_url=None
)
await jobs_repo.insert_pending(
session, _make_request("alpha", "a-2"), callback_url=None
)
await session.commit()
async with session_factory() as session:
jobs, total = await jobs_repo.list_recent(
session, limit=50, offset=0, client_id="alpha"
)
assert total == 2
assert all(j.client_id == "alpha" for j in jobs)
async def test_list_recent_pagination(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
ids: list[UUID] = []
for i in range(7):
async with session_factory() as session:
job = await jobs_repo.insert_pending(
session, _make_request("c", f"pg-{i}"), callback_url=None
)
await session.commit()
ids.append(job.job_id)
async with session_factory() as session:
page1, total1 = await jobs_repo.list_recent(
session, limit=3, offset=0
)
assert total1 == 7
assert len(page1) == 3
# Newest three are the last three inserted.
assert [j.job_id for j in page1] == list(reversed(ids[-3:]))
async with session_factory() as session:
page2, total2 = await jobs_repo.list_recent(
session, limit=3, offset=3
)
assert total2 == 7
assert len(page2) == 3
expected = list(reversed(ids))[3:6]
assert [j.job_id for j in page2] == expected
async with session_factory() as session:
page3, total3 = await jobs_repo.list_recent(
session, limit=3, offset=6
)
assert total3 == 7
assert len(page3) == 1
assert page3[0].job_id == ids[0]
async def test_list_recent_caps_limit(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
"""limit is capped at 200 — asking for 9999 gets clamped."""
async with session_factory() as session:
jobs, total = await jobs_repo.list_recent(
session, limit=9999, offset=0
)
assert total == 0
assert jobs == []
async def test_list_recent_rejects_negative_offset(
session_factory: async_sessionmaker[AsyncSession],
) -> None:
async with session_factory() as session:
import pytest as _pytest
with _pytest.raises(ValueError):
await jobs_repo.list_recent(session, limit=50, offset=-1)