Compare commits

...

2 commits

Author SHA1 Message Date
a54a968313 Merge pull request 'test(pipeline): end-to-end hermetic test with fakes + synthetic fixture' (#17) from feat/pipeline-e2e-fakes into main
Some checks failed
tests / test (push) Has been cancelled
2026-04-18 09:24:51 +00:00
b109bba873 test(pipeline): end-to-end hermetic test with fakes + synthetic fixture
All checks were successful
tests / test (push) Successful in 59s
tests / test (pull_request) Successful in 57s
Wires the five pipeline steps together with FakeOCRClient +
FakeGenAIClient, feeds the committed synthetic_giro.pdf fixture via
file:// URL, and asserts the full response shape.

- scripts/create_fixture_pdf.py: PyMuPDF-based builder. One-page A4 PDF
  with six known header strings (bank name, IBAN, period, balances,
  statement date). Re-runnable on demand; the committed PDF is what CI
  consumes.
- tests/fixtures/synthetic_giro.pdf: committed output.
- tests/unit/test_pipeline_end_to_end.py: 5 tests covering
  * ix_result.result fields populated from the fake LLM
  * provenance.fields["result.closing_balance"].provenance_verified True
  * text_agreement True when Paperless-style texts match the value
  * metadata.timings has one entry per step in the right order
  * response.error is None and context is not serialised

197 tests total; ruff clean. No integration tests, no real clients,
no network.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 11:24:29 +02:00
3 changed files with 436 additions and 0 deletions

View file

@ -0,0 +1,66 @@
"""Build the synthetic E2E fixture PDF at ``tests/fixtures/synthetic_giro.pdf``.
Re-runnable on demand. Output bytes are stable across runs in page
content, layout, and text only the PDF's embedded timestamps change,
which pipeline tests don't read. The committed fixture is what CI
consumes; re-run this script locally if you change the ground truth.
Contents: one A4 portrait page with six known strings placed at fixed
positions near the top. The goal is reproducible ground truth, not a
realistic bank statement. The pipeline's fake OCR client is seeded with
those same strings (at plausible bboxes) so the E2E test can assert
exact matches.
Usage::
uv run python scripts/create_fixture_pdf.py
"""
from __future__ import annotations
from pathlib import Path
import fitz # PyMuPDF
OUT_PATH = (
Path(__file__).resolve().parent.parent / "tests" / "fixtures" / "synthetic_giro.pdf"
)
LINES: list[str] = [
"DKB",
"IBAN: DE89370400440532013000",
"Statement period: 01.03.2026 - 31.03.2026",
"Opening balance: 1234.56 EUR",
"Closing balance: 1450.22 EUR",
"Statement date: 31.03.2026",
]
def build() -> None:
doc = fitz.open()
# A4 @ 72 dpi -> 595 x 842 points.
page = doc.new_page(width=595, height=842)
y = 72.0
for line in LINES:
page.insert_text(
(72.0, y),
line,
fontsize=12,
fontname="helv",
)
y += 24.0
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
# deflate=False + garbage=0 keeps the output byte-stable.
doc.save(
str(OUT_PATH),
deflate=False,
deflate_images=False,
garbage=0,
clean=False,
)
doc.close()
if __name__ == "__main__":
build()
print(f"wrote {OUT_PATH}")

98
tests/fixtures/synthetic_giro.pdf vendored Normal file
View file

@ -0,0 +1,98 @@
%PDF-1.7
%µ¶
% Written by MuPDF 1.27.2
1 0 obj
<</Type/Catalog/Pages 2 0 R/Info<</Producer(MuPDF 1.27.2)>>>>
endobj
2 0 obj
<</Type/Pages/Count 1/Kids[4 0 R]>>
endobj
3 0 obj
<</Font<</helv 5 0 R>>>>
endobj
4 0 obj
<</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 3 0 R/Parent 2 0 R/Contents[6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R]>>
endobj
5 0 obj
<</Type/Font/Subtype/Type1/BaseFont/Helvetica/Encoding/WinAnsiEncoding>>
endobj
6 0 obj
<</Length 54>>
stream
q
BT
1 0 0 1 72 770 Tm
/helv 12 Tf [<444b42>]TJ
ET
Q
endstream
endobj
7 0 obj
<</Length 95/Filter/FlateDecode>>
stream
ˆ1
€@ û¼"?ð¬žX6vB:±°P,´°ñýæXf†^<1A>„SL8+g4ìU×q,Ê~òÚ£ƒBpØ® @m­uf-òÅu4 K¸Ô4l>Óä´Ð9
endstream
endobj
8 0 obj
<</Length 105/Filter/FlateDecode>>
stream
xÚe‰±
ACûùŠùg2»3b!ØØ ÛÉ·‡…6~¿é%ÉK ò‘ËW£\ 4t¼å𜯯:÷®<C3B7>S<EFBFBD>jéLÏ<4C>™Õ`eÙ yÌ=[¬°°pL2H° ÃÆ'þŸó2nrr—Ò
endstream
endobj
9 0 obj
<</Length 100/Filter/FlateDecode>>
stream
xÚ Ã±
Â@EÑ~¾bþÀ™7»o ˆ…`c'LR„°Á")lü~÷^Ž|åâjc×åtÕ<åòéÇOš»Î·²7ceç44Aç6tk¬°ð@Dô¨AX©#Ü—|É3å-Åyd
endstream
endobj
10 0 obj
<</Length 99/Filter/FlateDecode>>
stream
ˆ1
B1û=ÅÞÀÝ÷’±lì„íÄB$ -l<¿™©fìk§²ôX¦¸FóúØî5ß?Oxm~;4ê©mP{M „ \'WQ<57><><E2809C><EFBFBD>IˆÖ8Þëb粫ý·V
endstream
endobj
11 0 obj
<</Length 93/Filter/FlateDecode>>
stream
xÚ-ˆ;
€@ ûœ"70ŸÝl#ÍvB:±\±ÐÂÆóBó)ÆX-ú ÝÙ®\ú¬%Ùö •$dÑMHUYš†ã%,jÃê&‡>NT
endstream
endobj
xref
0 12
0000000000 65535 f
0000000042 00000 n
0000000120 00000 n
0000000172 00000 n
0000000213 00000 n
0000000352 00000 n
0000000441 00000 n
0000000544 00000 n
0000000707 00000 n
0000000881 00000 n
0000001050 00000 n
0000001218 00000 n
trailer
<</Size 12/Root 1 0 R/ID[<C3B4C38E004FC2B6C3A0C2BF4C00C282><890F3E53B827FF9B00CB90D2895721FC>]>>
startxref
1380
%%EOF

View file

@ -0,0 +1,272 @@
"""End-to-end pipeline test with the fake OCR + GenAI clients (spec sections 6-9).
Feeds the committed ``tests/fixtures/synthetic_giro.pdf`` through the
full five-step pipeline with canned OCR + canned LLM responses.
Hermetic: no Surya, no Ollama, no network.
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from pathlib import Path
from typing import Any
import pytest
from pydantic import BaseModel
from ix.contracts import (
Context,
Line,
OCRDetails,
OCROptions,
OCRResult,
Options,
Page,
ProvenanceOptions,
RequestIX,
SegmentCitation,
)
from ix.genai import FakeGenAIClient, GenAIUsage
from ix.ocr import FakeOCRClient
from ix.pipeline import Pipeline
from ix.pipeline.genai_step import GenAIStep
from ix.pipeline.ocr_step import OCRStep
from ix.pipeline.reliability_step import ReliabilityStep
from ix.pipeline.response_handler_step import ResponseHandlerStep
from ix.pipeline.setup_step import SetupStep
from ix.use_cases.bank_statement_header import BankStatementHeader
FIXTURE_PDF = Path(__file__).resolve().parent.parent / "fixtures" / "synthetic_giro.pdf"
# Ground-truth values. Must match the strings the fixture builder drops on
# the page AND the canned OCR output below.
EXPECTED_BANK_NAME = "DKB"
EXPECTED_IBAN = "DE89370400440532013000"
EXPECTED_OPENING = Decimal("1234.56")
EXPECTED_CLOSING = Decimal("1450.22")
EXPECTED_CURRENCY = "EUR"
EXPECTED_STATEMENT_DATE = date(2026, 3, 31)
EXPECTED_PERIOD_START = date(2026, 3, 1)
EXPECTED_PERIOD_END = date(2026, 3, 31)
def _canned_ocr_result() -> OCRResult:
"""Canned Surya-shaped result for the synthetic_giro fixture.
Line texts match the strings placed by create_fixture_pdf.py. Bboxes
are plausible-but-not-exact: the fixture builder uses 72 pt left
margin and 24 pt line height on a 595x842 page, so we mirror those
coords here so normalisation gives sensible 0-1 values.
"""
width, height = 595.0, 842.0
lines_meta = [
("DKB", 60.0),
("IBAN: DE89370400440532013000", 84.0),
("Statement period: 01.03.2026 - 31.03.2026", 108.0),
("Opening balance: 1234.56 EUR", 132.0),
("Closing balance: 1450.22 EUR", 156.0),
("Statement date: 31.03.2026", 180.0),
]
lines: list[Line] = []
for text, y_top in lines_meta:
y_bot = y_top + 16.0
lines.append(
Line(
text=text,
bounding_box=[72.0, y_top, 500.0, y_top, 500.0, y_bot, 72.0, y_bot],
)
)
return OCRResult(
result=OCRDetails(
text="\n".join(t for t, _ in lines_meta),
pages=[
Page(
page_no=1,
width=width,
height=height,
lines=lines,
)
],
),
meta_data={"engine": "fake"},
)
class _WrappedResponse(BaseModel):
"""Mirrors the runtime ProvenanceWrappedResponse GenAIStep creates."""
result: BankStatementHeader
segment_citations: list[SegmentCitation] = []
def _canned_llm_output() -> _WrappedResponse:
# After OCRStep injects <page> tag lines, the real OCR line at local
# index 0 gets segment id p1_l0 (tag lines are skipped by
# SegmentIndex.build). So:
# p1_l0 -> "DKB"
# p1_l1 -> "IBAN: DE89370400440532013000"
# p1_l2 -> "Statement period: 01.03.2026 - 31.03.2026"
# p1_l3 -> "Opening balance: 1234.56 EUR"
# p1_l4 -> "Closing balance: 1450.22 EUR"
# p1_l5 -> "Statement date: 31.03.2026"
return _WrappedResponse(
result=BankStatementHeader(
bank_name=EXPECTED_BANK_NAME,
account_iban=EXPECTED_IBAN,
account_type="checking",
currency=EXPECTED_CURRENCY,
statement_date=EXPECTED_STATEMENT_DATE,
statement_period_start=EXPECTED_PERIOD_START,
statement_period_end=EXPECTED_PERIOD_END,
opening_balance=EXPECTED_OPENING,
closing_balance=EXPECTED_CLOSING,
),
segment_citations=[
SegmentCitation(
field_path="result.bank_name",
value_segment_ids=["p1_l0"],
context_segment_ids=[],
),
SegmentCitation(
field_path="result.account_iban",
value_segment_ids=["p1_l1"],
context_segment_ids=[],
),
SegmentCitation(
field_path="result.account_type",
value_segment_ids=[],
context_segment_ids=["p1_l0"],
),
SegmentCitation(
field_path="result.currency",
value_segment_ids=["p1_l3", "p1_l4"],
context_segment_ids=[],
),
SegmentCitation(
field_path="result.statement_date",
value_segment_ids=["p1_l5"],
context_segment_ids=[],
),
SegmentCitation(
field_path="result.statement_period_start",
value_segment_ids=["p1_l2"],
context_segment_ids=[],
),
SegmentCitation(
field_path="result.statement_period_end",
value_segment_ids=["p1_l2"],
context_segment_ids=[],
),
SegmentCitation(
field_path="result.opening_balance",
value_segment_ids=["p1_l3"],
context_segment_ids=[],
),
SegmentCitation(
field_path="result.closing_balance",
value_segment_ids=["p1_l4"],
context_segment_ids=[],
),
],
)
def _build_pipeline(fetch_config: Any = None) -> Pipeline:
ocr_client = FakeOCRClient(canned=_canned_ocr_result())
genai_client = FakeGenAIClient(
parsed=_canned_llm_output(),
usage=GenAIUsage(prompt_tokens=200, completion_tokens=400),
model_name="fake-gpt",
)
setup = SetupStep(fetch_config=fetch_config) if fetch_config else SetupStep()
return Pipeline(
steps=[
setup,
OCRStep(ocr_client=ocr_client),
GenAIStep(genai_client=genai_client),
ReliabilityStep(),
ResponseHandlerStep(),
]
)
class TestEndToEnd:
@pytest.fixture
def request_ix(self, tmp_path: Path) -> RequestIX:
# Canonical single-file request pointing to the committed fixture
# via file:// URL. Also includes a matching Paperless-style text
# so text_agreement has real data to compare against.
paperless_text = (
"DKB statement. IBAN: DE89370400440532013000. Period 01.03.2026 - "
"31.03.2026. Opening balance 1234.56 EUR. Closing balance 1450.22 EUR. "
"Date 31.03.2026."
)
return RequestIX(
use_case="bank_statement_header",
ix_client_id="mammon-test",
request_id="end-to-end-1",
ix_id="abcd0123ef456789",
context=Context(
files=[FIXTURE_PDF.as_uri()],
texts=[paperless_text],
),
options=Options(
ocr=OCROptions(use_ocr=True),
provenance=ProvenanceOptions(
include_provenance=True, max_sources_per_field=5
),
),
)
async def test_ix_result_populated_from_fake_llm(self, request_ix: RequestIX) -> None:
pipeline = _build_pipeline()
response = await pipeline.start(request_ix)
assert response.error is None
result = response.ix_result.result
assert result["bank_name"] == EXPECTED_BANK_NAME
assert result["account_iban"] == EXPECTED_IBAN
assert result["currency"] == EXPECTED_CURRENCY
# Pydantic v2 dumps Decimals as strings in mode="json".
assert result["closing_balance"] == str(EXPECTED_CLOSING)
async def test_provenance_verified_for_closing_balance(
self, request_ix: RequestIX
) -> None:
pipeline = _build_pipeline()
response = await pipeline.start(request_ix)
assert response.provenance is not None
fp = response.provenance.fields["result.closing_balance"]
assert fp.provenance_verified is True
async def test_text_agreement_true_when_texts_match_value(
self, request_ix: RequestIX
) -> None:
pipeline = _build_pipeline()
response = await pipeline.start(request_ix)
assert response.provenance is not None
fp = response.provenance.fields["result.closing_balance"]
assert fp.text_agreement is True
async def test_timings_per_step(self, request_ix: RequestIX) -> None:
pipeline = _build_pipeline()
response = await pipeline.start(request_ix)
# Each of the five steps executed and recorded a timing.
names = [t["step"] for t in response.metadata.timings]
assert names == [
"SetupStep",
"OCRStep",
"GenAIStep",
"ReliabilityStep",
"ResponseHandlerStep",
]
for entry in response.metadata.timings:
assert isinstance(entry["elapsed_seconds"], float)
async def test_no_error_and_context_stripped(self, request_ix: RequestIX) -> None:
pipeline = _build_pipeline()
response = await pipeline.start(request_ix)
assert response.error is None
dump = response.model_dump()
assert "context" not in dump