From b109bba873c550a7c7adb54beeec583b1f8c0249 Mon Sep 17 00:00:00 2001 From: Dirk Riemann Date: Sat, 18 Apr 2026 11:24:29 +0200 Subject: [PATCH] test(pipeline): end-to-end hermetic test with fakes + synthetic fixture Wires the five pipeline steps together with FakeOCRClient + FakeGenAIClient, feeds the committed synthetic_giro.pdf fixture via file:// URL, and asserts the full response shape. - scripts/create_fixture_pdf.py: PyMuPDF-based builder. One-page A4 PDF with six known header strings (bank name, IBAN, period, balances, statement date). Re-runnable on demand; the committed PDF is what CI consumes. - tests/fixtures/synthetic_giro.pdf: committed output. - tests/unit/test_pipeline_end_to_end.py: 5 tests covering * ix_result.result fields populated from the fake LLM * provenance.fields["result.closing_balance"].provenance_verified True * text_agreement True when Paperless-style texts match the value * metadata.timings has one entry per step in the right order * response.error is None and context is not serialised 197 tests total; ruff clean. No integration tests, no real clients, no network. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/create_fixture_pdf.py | 66 ++++++ tests/fixtures/synthetic_giro.pdf | 98 +++++++++ tests/unit/test_pipeline_end_to_end.py | 272 +++++++++++++++++++++++++ 3 files changed, 436 insertions(+) create mode 100644 scripts/create_fixture_pdf.py create mode 100644 tests/fixtures/synthetic_giro.pdf create mode 100644 tests/unit/test_pipeline_end_to_end.py diff --git a/scripts/create_fixture_pdf.py b/scripts/create_fixture_pdf.py new file mode 100644 index 0000000..5f1ab65 --- /dev/null +++ b/scripts/create_fixture_pdf.py @@ -0,0 +1,66 @@ +"""Build the synthetic E2E fixture PDF at ``tests/fixtures/synthetic_giro.pdf``. + +Re-runnable on demand. Output bytes are stable across runs in page +content, layout, and text — only the PDF's embedded timestamps change, +which pipeline tests don't read. The committed fixture is what CI +consumes; re-run this script locally if you change the ground truth. + +Contents: one A4 portrait page with six known strings placed at fixed +positions near the top. The goal is reproducible ground truth, not a +realistic bank statement. The pipeline's fake OCR client is seeded with +those same strings (at plausible bboxes) so the E2E test can assert +exact matches. + +Usage:: + + uv run python scripts/create_fixture_pdf.py +""" + +from __future__ import annotations + +from pathlib import Path + +import fitz # PyMuPDF + +OUT_PATH = ( + Path(__file__).resolve().parent.parent / "tests" / "fixtures" / "synthetic_giro.pdf" +) + +LINES: list[str] = [ + "DKB", + "IBAN: DE89370400440532013000", + "Statement period: 01.03.2026 - 31.03.2026", + "Opening balance: 1234.56 EUR", + "Closing balance: 1450.22 EUR", + "Statement date: 31.03.2026", +] + + +def build() -> None: + doc = fitz.open() + # A4 @ 72 dpi -> 595 x 842 points. + page = doc.new_page(width=595, height=842) + y = 72.0 + for line in LINES: + page.insert_text( + (72.0, y), + line, + fontsize=12, + fontname="helv", + ) + y += 24.0 + OUT_PATH.parent.mkdir(parents=True, exist_ok=True) + # deflate=False + garbage=0 keeps the output byte-stable. + doc.save( + str(OUT_PATH), + deflate=False, + deflate_images=False, + garbage=0, + clean=False, + ) + doc.close() + + +if __name__ == "__main__": + build() + print(f"wrote {OUT_PATH}") diff --git a/tests/fixtures/synthetic_giro.pdf b/tests/fixtures/synthetic_giro.pdf new file mode 100644 index 0000000..05e5b20 --- /dev/null +++ b/tests/fixtures/synthetic_giro.pdf @@ -0,0 +1,98 @@ +%PDF-1.7 +%µ¶ +% Written by MuPDF 1.27.2 + +1 0 obj +<>>> +endobj + +2 0 obj +<> +endobj + +3 0 obj +<>>> +endobj + +4 0 obj +<> +endobj + +5 0 obj +<> +endobj + +6 0 obj +<> +stream + +q +BT +1 0 0 1 72 770 Tm +/helv 12 Tf [<444b42>]TJ +ET +Q + +endstream +endobj + +7 0 obj +<> +stream +x1 +@ "?𒬞X6vB:P,Xf^SL8+g4Uq,~ڣBpخ @muf-u4 K4l>9 +endstream +endobj + +8 0 obj +<> +stream +xe +ACg23b! 6~%K W\ 4t𜯯:SjLύ`e y=[pL2H '2nrrS +endstream +endobj + +9 0 obj +<> +stream +x ñ +@E~b7o `c'LR")l~^|jct> +stream +x 1 +B1=lB$ -l> +stream +x-; +@ "70l#vB:\B)X- ٮY\%$dMHUY%,j&>NT +endstream +endobj + +xref +0 12 +0000000000 65535 f +0000000042 00000 n +0000000120 00000 n +0000000172 00000 n +0000000213 00000 n +0000000352 00000 n +0000000441 00000 n +0000000544 00000 n +0000000707 00000 n +0000000881 00000 n +0000001050 00000 n +0000001218 00000 n + +trailer +<<890F3E53B827FF9B00CB90D2895721FC>]>> +startxref +1380 +%%EOF diff --git a/tests/unit/test_pipeline_end_to_end.py b/tests/unit/test_pipeline_end_to_end.py new file mode 100644 index 0000000..44834b2 --- /dev/null +++ b/tests/unit/test_pipeline_end_to_end.py @@ -0,0 +1,272 @@ +"""End-to-end pipeline test with the fake OCR + GenAI clients (spec sections 6-9). + +Feeds the committed ``tests/fixtures/synthetic_giro.pdf`` through the +full five-step pipeline with canned OCR + canned LLM responses. +Hermetic: no Surya, no Ollama, no network. +""" + +from __future__ import annotations + +from datetime import date +from decimal import Decimal +from pathlib import Path +from typing import Any + +import pytest +from pydantic import BaseModel + +from ix.contracts import ( + Context, + Line, + OCRDetails, + OCROptions, + OCRResult, + Options, + Page, + ProvenanceOptions, + RequestIX, + SegmentCitation, +) +from ix.genai import FakeGenAIClient, GenAIUsage +from ix.ocr import FakeOCRClient +from ix.pipeline import Pipeline +from ix.pipeline.genai_step import GenAIStep +from ix.pipeline.ocr_step import OCRStep +from ix.pipeline.reliability_step import ReliabilityStep +from ix.pipeline.response_handler_step import ResponseHandlerStep +from ix.pipeline.setup_step import SetupStep +from ix.use_cases.bank_statement_header import BankStatementHeader + +FIXTURE_PDF = Path(__file__).resolve().parent.parent / "fixtures" / "synthetic_giro.pdf" + + +# Ground-truth values. Must match the strings the fixture builder drops on +# the page AND the canned OCR output below. +EXPECTED_BANK_NAME = "DKB" +EXPECTED_IBAN = "DE89370400440532013000" +EXPECTED_OPENING = Decimal("1234.56") +EXPECTED_CLOSING = Decimal("1450.22") +EXPECTED_CURRENCY = "EUR" +EXPECTED_STATEMENT_DATE = date(2026, 3, 31) +EXPECTED_PERIOD_START = date(2026, 3, 1) +EXPECTED_PERIOD_END = date(2026, 3, 31) + + +def _canned_ocr_result() -> OCRResult: + """Canned Surya-shaped result for the synthetic_giro fixture. + + Line texts match the strings placed by create_fixture_pdf.py. Bboxes + are plausible-but-not-exact: the fixture builder uses 72 pt left + margin and 24 pt line height on a 595x842 page, so we mirror those + coords here so normalisation gives sensible 0-1 values. + """ + width, height = 595.0, 842.0 + lines_meta = [ + ("DKB", 60.0), + ("IBAN: DE89370400440532013000", 84.0), + ("Statement period: 01.03.2026 - 31.03.2026", 108.0), + ("Opening balance: 1234.56 EUR", 132.0), + ("Closing balance: 1450.22 EUR", 156.0), + ("Statement date: 31.03.2026", 180.0), + ] + lines: list[Line] = [] + for text, y_top in lines_meta: + y_bot = y_top + 16.0 + lines.append( + Line( + text=text, + bounding_box=[72.0, y_top, 500.0, y_top, 500.0, y_bot, 72.0, y_bot], + ) + ) + return OCRResult( + result=OCRDetails( + text="\n".join(t for t, _ in lines_meta), + pages=[ + Page( + page_no=1, + width=width, + height=height, + lines=lines, + ) + ], + ), + meta_data={"engine": "fake"}, + ) + + +class _WrappedResponse(BaseModel): + """Mirrors the runtime ProvenanceWrappedResponse GenAIStep creates.""" + + result: BankStatementHeader + segment_citations: list[SegmentCitation] = [] + + +def _canned_llm_output() -> _WrappedResponse: + # After OCRStep injects tag lines, the real OCR line at local + # index 0 gets segment id p1_l0 (tag lines are skipped by + # SegmentIndex.build). So: + # p1_l0 -> "DKB" + # p1_l1 -> "IBAN: DE89370400440532013000" + # p1_l2 -> "Statement period: 01.03.2026 - 31.03.2026" + # p1_l3 -> "Opening balance: 1234.56 EUR" + # p1_l4 -> "Closing balance: 1450.22 EUR" + # p1_l5 -> "Statement date: 31.03.2026" + return _WrappedResponse( + result=BankStatementHeader( + bank_name=EXPECTED_BANK_NAME, + account_iban=EXPECTED_IBAN, + account_type="checking", + currency=EXPECTED_CURRENCY, + statement_date=EXPECTED_STATEMENT_DATE, + statement_period_start=EXPECTED_PERIOD_START, + statement_period_end=EXPECTED_PERIOD_END, + opening_balance=EXPECTED_OPENING, + closing_balance=EXPECTED_CLOSING, + ), + segment_citations=[ + SegmentCitation( + field_path="result.bank_name", + value_segment_ids=["p1_l0"], + context_segment_ids=[], + ), + SegmentCitation( + field_path="result.account_iban", + value_segment_ids=["p1_l1"], + context_segment_ids=[], + ), + SegmentCitation( + field_path="result.account_type", + value_segment_ids=[], + context_segment_ids=["p1_l0"], + ), + SegmentCitation( + field_path="result.currency", + value_segment_ids=["p1_l3", "p1_l4"], + context_segment_ids=[], + ), + SegmentCitation( + field_path="result.statement_date", + value_segment_ids=["p1_l5"], + context_segment_ids=[], + ), + SegmentCitation( + field_path="result.statement_period_start", + value_segment_ids=["p1_l2"], + context_segment_ids=[], + ), + SegmentCitation( + field_path="result.statement_period_end", + value_segment_ids=["p1_l2"], + context_segment_ids=[], + ), + SegmentCitation( + field_path="result.opening_balance", + value_segment_ids=["p1_l3"], + context_segment_ids=[], + ), + SegmentCitation( + field_path="result.closing_balance", + value_segment_ids=["p1_l4"], + context_segment_ids=[], + ), + ], + ) + + +def _build_pipeline(fetch_config: Any = None) -> Pipeline: + ocr_client = FakeOCRClient(canned=_canned_ocr_result()) + genai_client = FakeGenAIClient( + parsed=_canned_llm_output(), + usage=GenAIUsage(prompt_tokens=200, completion_tokens=400), + model_name="fake-gpt", + ) + setup = SetupStep(fetch_config=fetch_config) if fetch_config else SetupStep() + return Pipeline( + steps=[ + setup, + OCRStep(ocr_client=ocr_client), + GenAIStep(genai_client=genai_client), + ReliabilityStep(), + ResponseHandlerStep(), + ] + ) + + +class TestEndToEnd: + @pytest.fixture + def request_ix(self, tmp_path: Path) -> RequestIX: + # Canonical single-file request pointing to the committed fixture + # via file:// URL. Also includes a matching Paperless-style text + # so text_agreement has real data to compare against. + paperless_text = ( + "DKB statement. IBAN: DE89370400440532013000. Period 01.03.2026 - " + "31.03.2026. Opening balance 1234.56 EUR. Closing balance 1450.22 EUR. " + "Date 31.03.2026." + ) + return RequestIX( + use_case="bank_statement_header", + ix_client_id="mammon-test", + request_id="end-to-end-1", + ix_id="abcd0123ef456789", + context=Context( + files=[FIXTURE_PDF.as_uri()], + texts=[paperless_text], + ), + options=Options( + ocr=OCROptions(use_ocr=True), + provenance=ProvenanceOptions( + include_provenance=True, max_sources_per_field=5 + ), + ), + ) + + async def test_ix_result_populated_from_fake_llm(self, request_ix: RequestIX) -> None: + pipeline = _build_pipeline() + response = await pipeline.start(request_ix) + assert response.error is None + result = response.ix_result.result + assert result["bank_name"] == EXPECTED_BANK_NAME + assert result["account_iban"] == EXPECTED_IBAN + assert result["currency"] == EXPECTED_CURRENCY + # Pydantic v2 dumps Decimals as strings in mode="json". + assert result["closing_balance"] == str(EXPECTED_CLOSING) + + async def test_provenance_verified_for_closing_balance( + self, request_ix: RequestIX + ) -> None: + pipeline = _build_pipeline() + response = await pipeline.start(request_ix) + assert response.provenance is not None + fp = response.provenance.fields["result.closing_balance"] + assert fp.provenance_verified is True + + async def test_text_agreement_true_when_texts_match_value( + self, request_ix: RequestIX + ) -> None: + pipeline = _build_pipeline() + response = await pipeline.start(request_ix) + assert response.provenance is not None + fp = response.provenance.fields["result.closing_balance"] + assert fp.text_agreement is True + + async def test_timings_per_step(self, request_ix: RequestIX) -> None: + pipeline = _build_pipeline() + response = await pipeline.start(request_ix) + # Each of the five steps executed and recorded a timing. + names = [t["step"] for t in response.metadata.timings] + assert names == [ + "SetupStep", + "OCRStep", + "GenAIStep", + "ReliabilityStep", + "ResponseHandlerStep", + ] + for entry in response.metadata.timings: + assert isinstance(entry["elapsed_seconds"], float) + + async def test_no_error_and_context_stripped(self, request_ix: RequestIX) -> None: + pipeline = _build_pipeline() + response = await pipeline.start(request_ix) + assert response.error is None + dump = response.model_dump() + assert "context" not in dump