test(pipeline): end-to-end hermetic test with fakes + synthetic fixture
Wires the five pipeline steps together with FakeOCRClient + FakeGenAIClient, feeds the committed synthetic_giro.pdf fixture via file:// URL, and asserts the full response shape. - scripts/create_fixture_pdf.py: PyMuPDF-based builder. One-page A4 PDF with six known header strings (bank name, IBAN, period, balances, statement date). Re-runnable on demand; the committed PDF is what CI consumes. - tests/fixtures/synthetic_giro.pdf: committed output. - tests/unit/test_pipeline_end_to_end.py: 5 tests covering * ix_result.result fields populated from the fake LLM * provenance.fields["result.closing_balance"].provenance_verified True * text_agreement True when Paperless-style texts match the value * metadata.timings has one entry per step in the right order * response.error is None and context is not serialised 197 tests total; ruff clean. No integration tests, no real clients, no network. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
118d77c428
commit
b109bba873
3 changed files with 436 additions and 0 deletions
66
scripts/create_fixture_pdf.py
Normal file
66
scripts/create_fixture_pdf.py
Normal file
|
|
@ -0,0 +1,66 @@
|
||||||
|
"""Build the synthetic E2E fixture PDF at ``tests/fixtures/synthetic_giro.pdf``.
|
||||||
|
|
||||||
|
Re-runnable on demand. Output bytes are stable across runs in page
|
||||||
|
content, layout, and text — only the PDF's embedded timestamps change,
|
||||||
|
which pipeline tests don't read. The committed fixture is what CI
|
||||||
|
consumes; re-run this script locally if you change the ground truth.
|
||||||
|
|
||||||
|
Contents: one A4 portrait page with six known strings placed at fixed
|
||||||
|
positions near the top. The goal is reproducible ground truth, not a
|
||||||
|
realistic bank statement. The pipeline's fake OCR client is seeded with
|
||||||
|
those same strings (at plausible bboxes) so the E2E test can assert
|
||||||
|
exact matches.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
uv run python scripts/create_fixture_pdf.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import fitz # PyMuPDF
|
||||||
|
|
||||||
|
OUT_PATH = (
|
||||||
|
Path(__file__).resolve().parent.parent / "tests" / "fixtures" / "synthetic_giro.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
LINES: list[str] = [
|
||||||
|
"DKB",
|
||||||
|
"IBAN: DE89370400440532013000",
|
||||||
|
"Statement period: 01.03.2026 - 31.03.2026",
|
||||||
|
"Opening balance: 1234.56 EUR",
|
||||||
|
"Closing balance: 1450.22 EUR",
|
||||||
|
"Statement date: 31.03.2026",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def build() -> None:
|
||||||
|
doc = fitz.open()
|
||||||
|
# A4 @ 72 dpi -> 595 x 842 points.
|
||||||
|
page = doc.new_page(width=595, height=842)
|
||||||
|
y = 72.0
|
||||||
|
for line in LINES:
|
||||||
|
page.insert_text(
|
||||||
|
(72.0, y),
|
||||||
|
line,
|
||||||
|
fontsize=12,
|
||||||
|
fontname="helv",
|
||||||
|
)
|
||||||
|
y += 24.0
|
||||||
|
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
# deflate=False + garbage=0 keeps the output byte-stable.
|
||||||
|
doc.save(
|
||||||
|
str(OUT_PATH),
|
||||||
|
deflate=False,
|
||||||
|
deflate_images=False,
|
||||||
|
garbage=0,
|
||||||
|
clean=False,
|
||||||
|
)
|
||||||
|
doc.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
build()
|
||||||
|
print(f"wrote {OUT_PATH}")
|
||||||
98
tests/fixtures/synthetic_giro.pdf
vendored
Normal file
98
tests/fixtures/synthetic_giro.pdf
vendored
Normal file
|
|
@ -0,0 +1,98 @@
|
||||||
|
%PDF-1.7
|
||||||
|
%µ¶
|
||||||
|
% Written by MuPDF 1.27.2
|
||||||
|
|
||||||
|
1 0 obj
|
||||||
|
<</Type/Catalog/Pages 2 0 R/Info<</Producer(MuPDF 1.27.2)>>>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
2 0 obj
|
||||||
|
<</Type/Pages/Count 1/Kids[4 0 R]>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
3 0 obj
|
||||||
|
<</Font<</helv 5 0 R>>>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
4 0 obj
|
||||||
|
<</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 3 0 R/Parent 2 0 R/Contents[6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R]>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
5 0 obj
|
||||||
|
<</Type/Font/Subtype/Type1/BaseFont/Helvetica/Encoding/WinAnsiEncoding>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
6 0 obj
|
||||||
|
<</Length 54>>
|
||||||
|
stream
|
||||||
|
|
||||||
|
q
|
||||||
|
BT
|
||||||
|
1 0 0 1 72 770 Tm
|
||||||
|
/helv 12 Tf [<444b42>]TJ
|
||||||
|
ET
|
||||||
|
Q
|
||||||
|
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
7 0 obj
|
||||||
|
<</Length 95/Filter/FlateDecode>>
|
||||||
|
stream
|
||||||
|
xÚˆ1
|
||||||
|
€@û¼"?𒬞‚X6vB:±°P,´°ñýæXf†^<1A>„SL8+g4ìU×q,Ê~òÚ£ƒBpØ® @muf–-‚òÅu4
K¸Ô4l>Óä´Ð•9
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
8 0 obj
|
||||||
|
<</Length 105/Filter/FlateDecode>>
|
||||||
|
stream
|
||||||
|
xÚe‰±
|
||||||
|
ACûùŠùg2»3b!ØØ ÛÉ·‡…6~¿é%ÉK ò‘ËW£\4t¼å𜯯:÷®<C3B7>S<EFBFBD>jéLÏ<4C>™Õ`eÙyÌ=[¬°°pL2H° ÃÆ'þŸó2nrr—S¦Ò
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
9 0 obj
|
||||||
|
<</Length 100/Filter/FlateDecode>>
|
||||||
|
stream
|
||||||
|
xÚ
ñ
|
||||||
|
Â@EÑ~¾bþÀ™7»o
ˆ…`c'LR„°Á")lü~÷^Ž|å‘âjc×åtÕ<åòéÇOš»Î·²7ceç44Aç6tk¬°ð@Dô¨AX©#Ü—|É3å-Åyd
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
10 0 obj
|
||||||
|
<</Length 99/Filter/FlateDecode>>
|
||||||
|
stream
|
||||||
|
xÚ
ˆ1
|
||||||
|
B1û=ÅÞÀÝ÷’±lì„íÄB$-l<¿™©fìk§²ôX¦¸FóúØî5ß?Oxm~;4ê©mP{M
„ \'WQ<57>“<><E2809C><EFBFBD>IˆÖ8Þëb粫ý·V
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
11 0 obj
|
||||||
|
<</Length 93/Filter/FlateDecode>>
|
||||||
|
stream
|
||||||
|
xÚ-ˆ;
|
||||||
|
€@ûœ"70ŸÝl#‚ÍvB:±\±ÐÂÆó›Bó)ÆX-ú
ÝÙ®YÐ\ú¬%Ùö •$dÑMHUYš†ã%,jÃê&‡>NT
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
xref
|
||||||
|
0 12
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000042 00000 n
|
||||||
|
0000000120 00000 n
|
||||||
|
0000000172 00000 n
|
||||||
|
0000000213 00000 n
|
||||||
|
0000000352 00000 n
|
||||||
|
0000000441 00000 n
|
||||||
|
0000000544 00000 n
|
||||||
|
0000000707 00000 n
|
||||||
|
0000000881 00000 n
|
||||||
|
0000001050 00000 n
|
||||||
|
0000001218 00000 n
|
||||||
|
|
||||||
|
trailer
|
||||||
|
<</Size 12/Root 1 0 R/ID[<C3B4C38E004FC2B6C3A0C2BF4C00C282><890F3E53B827FF9B00CB90D2895721FC>]>>
|
||||||
|
startxref
|
||||||
|
1380
|
||||||
|
%%EOF
|
||||||
272
tests/unit/test_pipeline_end_to_end.py
Normal file
272
tests/unit/test_pipeline_end_to_end.py
Normal file
|
|
@ -0,0 +1,272 @@
|
||||||
|
"""End-to-end pipeline test with the fake OCR + GenAI clients (spec sections 6-9).
|
||||||
|
|
||||||
|
Feeds the committed ``tests/fixtures/synthetic_giro.pdf`` through the
|
||||||
|
full five-step pipeline with canned OCR + canned LLM responses.
|
||||||
|
Hermetic: no Surya, no Ollama, no network.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
from decimal import Decimal
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from ix.contracts import (
|
||||||
|
Context,
|
||||||
|
Line,
|
||||||
|
OCRDetails,
|
||||||
|
OCROptions,
|
||||||
|
OCRResult,
|
||||||
|
Options,
|
||||||
|
Page,
|
||||||
|
ProvenanceOptions,
|
||||||
|
RequestIX,
|
||||||
|
SegmentCitation,
|
||||||
|
)
|
||||||
|
from ix.genai import FakeGenAIClient, GenAIUsage
|
||||||
|
from ix.ocr import FakeOCRClient
|
||||||
|
from ix.pipeline import Pipeline
|
||||||
|
from ix.pipeline.genai_step import GenAIStep
|
||||||
|
from ix.pipeline.ocr_step import OCRStep
|
||||||
|
from ix.pipeline.reliability_step import ReliabilityStep
|
||||||
|
from ix.pipeline.response_handler_step import ResponseHandlerStep
|
||||||
|
from ix.pipeline.setup_step import SetupStep
|
||||||
|
from ix.use_cases.bank_statement_header import BankStatementHeader
|
||||||
|
|
||||||
|
FIXTURE_PDF = Path(__file__).resolve().parent.parent / "fixtures" / "synthetic_giro.pdf"
|
||||||
|
|
||||||
|
|
||||||
|
# Ground-truth values. Must match the strings the fixture builder drops on
|
||||||
|
# the page AND the canned OCR output below.
|
||||||
|
EXPECTED_BANK_NAME = "DKB"
|
||||||
|
EXPECTED_IBAN = "DE89370400440532013000"
|
||||||
|
EXPECTED_OPENING = Decimal("1234.56")
|
||||||
|
EXPECTED_CLOSING = Decimal("1450.22")
|
||||||
|
EXPECTED_CURRENCY = "EUR"
|
||||||
|
EXPECTED_STATEMENT_DATE = date(2026, 3, 31)
|
||||||
|
EXPECTED_PERIOD_START = date(2026, 3, 1)
|
||||||
|
EXPECTED_PERIOD_END = date(2026, 3, 31)
|
||||||
|
|
||||||
|
|
||||||
|
def _canned_ocr_result() -> OCRResult:
|
||||||
|
"""Canned Surya-shaped result for the synthetic_giro fixture.
|
||||||
|
|
||||||
|
Line texts match the strings placed by create_fixture_pdf.py. Bboxes
|
||||||
|
are plausible-but-not-exact: the fixture builder uses 72 pt left
|
||||||
|
margin and 24 pt line height on a 595x842 page, so we mirror those
|
||||||
|
coords here so normalisation gives sensible 0-1 values.
|
||||||
|
"""
|
||||||
|
width, height = 595.0, 842.0
|
||||||
|
lines_meta = [
|
||||||
|
("DKB", 60.0),
|
||||||
|
("IBAN: DE89370400440532013000", 84.0),
|
||||||
|
("Statement period: 01.03.2026 - 31.03.2026", 108.0),
|
||||||
|
("Opening balance: 1234.56 EUR", 132.0),
|
||||||
|
("Closing balance: 1450.22 EUR", 156.0),
|
||||||
|
("Statement date: 31.03.2026", 180.0),
|
||||||
|
]
|
||||||
|
lines: list[Line] = []
|
||||||
|
for text, y_top in lines_meta:
|
||||||
|
y_bot = y_top + 16.0
|
||||||
|
lines.append(
|
||||||
|
Line(
|
||||||
|
text=text,
|
||||||
|
bounding_box=[72.0, y_top, 500.0, y_top, 500.0, y_bot, 72.0, y_bot],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return OCRResult(
|
||||||
|
result=OCRDetails(
|
||||||
|
text="\n".join(t for t, _ in lines_meta),
|
||||||
|
pages=[
|
||||||
|
Page(
|
||||||
|
page_no=1,
|
||||||
|
width=width,
|
||||||
|
height=height,
|
||||||
|
lines=lines,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
),
|
||||||
|
meta_data={"engine": "fake"},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class _WrappedResponse(BaseModel):
|
||||||
|
"""Mirrors the runtime ProvenanceWrappedResponse GenAIStep creates."""
|
||||||
|
|
||||||
|
result: BankStatementHeader
|
||||||
|
segment_citations: list[SegmentCitation] = []
|
||||||
|
|
||||||
|
|
||||||
|
def _canned_llm_output() -> _WrappedResponse:
|
||||||
|
# After OCRStep injects <page> tag lines, the real OCR line at local
|
||||||
|
# index 0 gets segment id p1_l0 (tag lines are skipped by
|
||||||
|
# SegmentIndex.build). So:
|
||||||
|
# p1_l0 -> "DKB"
|
||||||
|
# p1_l1 -> "IBAN: DE89370400440532013000"
|
||||||
|
# p1_l2 -> "Statement period: 01.03.2026 - 31.03.2026"
|
||||||
|
# p1_l3 -> "Opening balance: 1234.56 EUR"
|
||||||
|
# p1_l4 -> "Closing balance: 1450.22 EUR"
|
||||||
|
# p1_l5 -> "Statement date: 31.03.2026"
|
||||||
|
return _WrappedResponse(
|
||||||
|
result=BankStatementHeader(
|
||||||
|
bank_name=EXPECTED_BANK_NAME,
|
||||||
|
account_iban=EXPECTED_IBAN,
|
||||||
|
account_type="checking",
|
||||||
|
currency=EXPECTED_CURRENCY,
|
||||||
|
statement_date=EXPECTED_STATEMENT_DATE,
|
||||||
|
statement_period_start=EXPECTED_PERIOD_START,
|
||||||
|
statement_period_end=EXPECTED_PERIOD_END,
|
||||||
|
opening_balance=EXPECTED_OPENING,
|
||||||
|
closing_balance=EXPECTED_CLOSING,
|
||||||
|
),
|
||||||
|
segment_citations=[
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.bank_name",
|
||||||
|
value_segment_ids=["p1_l0"],
|
||||||
|
context_segment_ids=[],
|
||||||
|
),
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.account_iban",
|
||||||
|
value_segment_ids=["p1_l1"],
|
||||||
|
context_segment_ids=[],
|
||||||
|
),
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.account_type",
|
||||||
|
value_segment_ids=[],
|
||||||
|
context_segment_ids=["p1_l0"],
|
||||||
|
),
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.currency",
|
||||||
|
value_segment_ids=["p1_l3", "p1_l4"],
|
||||||
|
context_segment_ids=[],
|
||||||
|
),
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.statement_date",
|
||||||
|
value_segment_ids=["p1_l5"],
|
||||||
|
context_segment_ids=[],
|
||||||
|
),
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.statement_period_start",
|
||||||
|
value_segment_ids=["p1_l2"],
|
||||||
|
context_segment_ids=[],
|
||||||
|
),
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.statement_period_end",
|
||||||
|
value_segment_ids=["p1_l2"],
|
||||||
|
context_segment_ids=[],
|
||||||
|
),
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.opening_balance",
|
||||||
|
value_segment_ids=["p1_l3"],
|
||||||
|
context_segment_ids=[],
|
||||||
|
),
|
||||||
|
SegmentCitation(
|
||||||
|
field_path="result.closing_balance",
|
||||||
|
value_segment_ids=["p1_l4"],
|
||||||
|
context_segment_ids=[],
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _build_pipeline(fetch_config: Any = None) -> Pipeline:
|
||||||
|
ocr_client = FakeOCRClient(canned=_canned_ocr_result())
|
||||||
|
genai_client = FakeGenAIClient(
|
||||||
|
parsed=_canned_llm_output(),
|
||||||
|
usage=GenAIUsage(prompt_tokens=200, completion_tokens=400),
|
||||||
|
model_name="fake-gpt",
|
||||||
|
)
|
||||||
|
setup = SetupStep(fetch_config=fetch_config) if fetch_config else SetupStep()
|
||||||
|
return Pipeline(
|
||||||
|
steps=[
|
||||||
|
setup,
|
||||||
|
OCRStep(ocr_client=ocr_client),
|
||||||
|
GenAIStep(genai_client=genai_client),
|
||||||
|
ReliabilityStep(),
|
||||||
|
ResponseHandlerStep(),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestEndToEnd:
|
||||||
|
@pytest.fixture
|
||||||
|
def request_ix(self, tmp_path: Path) -> RequestIX:
|
||||||
|
# Canonical single-file request pointing to the committed fixture
|
||||||
|
# via file:// URL. Also includes a matching Paperless-style text
|
||||||
|
# so text_agreement has real data to compare against.
|
||||||
|
paperless_text = (
|
||||||
|
"DKB statement. IBAN: DE89370400440532013000. Period 01.03.2026 - "
|
||||||
|
"31.03.2026. Opening balance 1234.56 EUR. Closing balance 1450.22 EUR. "
|
||||||
|
"Date 31.03.2026."
|
||||||
|
)
|
||||||
|
return RequestIX(
|
||||||
|
use_case="bank_statement_header",
|
||||||
|
ix_client_id="mammon-test",
|
||||||
|
request_id="end-to-end-1",
|
||||||
|
ix_id="abcd0123ef456789",
|
||||||
|
context=Context(
|
||||||
|
files=[FIXTURE_PDF.as_uri()],
|
||||||
|
texts=[paperless_text],
|
||||||
|
),
|
||||||
|
options=Options(
|
||||||
|
ocr=OCROptions(use_ocr=True),
|
||||||
|
provenance=ProvenanceOptions(
|
||||||
|
include_provenance=True, max_sources_per_field=5
|
||||||
|
),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
async def test_ix_result_populated_from_fake_llm(self, request_ix: RequestIX) -> None:
|
||||||
|
pipeline = _build_pipeline()
|
||||||
|
response = await pipeline.start(request_ix)
|
||||||
|
assert response.error is None
|
||||||
|
result = response.ix_result.result
|
||||||
|
assert result["bank_name"] == EXPECTED_BANK_NAME
|
||||||
|
assert result["account_iban"] == EXPECTED_IBAN
|
||||||
|
assert result["currency"] == EXPECTED_CURRENCY
|
||||||
|
# Pydantic v2 dumps Decimals as strings in mode="json".
|
||||||
|
assert result["closing_balance"] == str(EXPECTED_CLOSING)
|
||||||
|
|
||||||
|
async def test_provenance_verified_for_closing_balance(
|
||||||
|
self, request_ix: RequestIX
|
||||||
|
) -> None:
|
||||||
|
pipeline = _build_pipeline()
|
||||||
|
response = await pipeline.start(request_ix)
|
||||||
|
assert response.provenance is not None
|
||||||
|
fp = response.provenance.fields["result.closing_balance"]
|
||||||
|
assert fp.provenance_verified is True
|
||||||
|
|
||||||
|
async def test_text_agreement_true_when_texts_match_value(
|
||||||
|
self, request_ix: RequestIX
|
||||||
|
) -> None:
|
||||||
|
pipeline = _build_pipeline()
|
||||||
|
response = await pipeline.start(request_ix)
|
||||||
|
assert response.provenance is not None
|
||||||
|
fp = response.provenance.fields["result.closing_balance"]
|
||||||
|
assert fp.text_agreement is True
|
||||||
|
|
||||||
|
async def test_timings_per_step(self, request_ix: RequestIX) -> None:
|
||||||
|
pipeline = _build_pipeline()
|
||||||
|
response = await pipeline.start(request_ix)
|
||||||
|
# Each of the five steps executed and recorded a timing.
|
||||||
|
names = [t["step"] for t in response.metadata.timings]
|
||||||
|
assert names == [
|
||||||
|
"SetupStep",
|
||||||
|
"OCRStep",
|
||||||
|
"GenAIStep",
|
||||||
|
"ReliabilityStep",
|
||||||
|
"ResponseHandlerStep",
|
||||||
|
]
|
||||||
|
for entry in response.metadata.timings:
|
||||||
|
assert isinstance(entry["elapsed_seconds"], float)
|
||||||
|
|
||||||
|
async def test_no_error_and_context_stripped(self, request_ix: RequestIX) -> None:
|
||||||
|
pipeline = _build_pipeline()
|
||||||
|
response = await pipeline.start(request_ix)
|
||||||
|
assert response.error is None
|
||||||
|
dump = response.model_dump()
|
||||||
|
assert "context" not in dump
|
||||||
Loading…
Reference in a new issue