From b109bba873c550a7c7adb54beeec583b1f8c0249 Mon Sep 17 00:00:00 2001
From: Dirk Riemann <ditori@gmail.com>
Date: Sat, 18 Apr 2026 11:24:29 +0200
Subject: [PATCH] test(pipeline): end-to-end hermetic test with fakes +
 synthetic fixture

Wires the five pipeline steps together with FakeOCRClient +
FakeGenAIClient, feeds the committed synthetic_giro.pdf fixture via
file:// URL, and asserts the full response shape.

- scripts/create_fixture_pdf.py: PyMuPDF-based builder. One-page A4 PDF
  with six known header strings (bank name, IBAN, period, balances,
  statement date). Re-runnable on demand; the committed PDF is what CI
  consumes.
- tests/fixtures/synthetic_giro.pdf: committed output.
- tests/unit/test_pipeline_end_to_end.py: 5 tests covering
  * ix_result.result fields populated from the fake LLM
  * provenance.fields["result.closing_balance"].provenance_verified True
  * text_agreement True when Paperless-style texts match the value
  * metadata.timings has one entry per step in the right order
  * response.error is None and context is not serialised

197 tests total; ruff clean. No integration tests, no real clients,
no network.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 scripts/create_fixture_pdf.py          |  66 ++++++
 tests/fixtures/synthetic_giro.pdf      |  98 +++++++++
 tests/unit/test_pipeline_end_to_end.py | 272 +++++++++++++++++++++++++
 3 files changed, 436 insertions(+)
 create mode 100644 scripts/create_fixture_pdf.py
 create mode 100644 tests/fixtures/synthetic_giro.pdf
 create mode 100644 tests/unit/test_pipeline_end_to_end.py
diff --git a/scripts/create_fixture_pdf.py b/scripts/create_fixture_pdf.py
new file mode 100644
index 0000000..5f1ab65
--- /dev/null
+++ b/scripts/create_fixture_pdf.py
@@ -0,0 +1,66 @@
+"""Build the synthetic E2E fixture PDF at ``tests/fixtures/synthetic_giro.pdf``.
+
+Re-runnable on demand. Output bytes are stable across runs in page
+content, layout, and text â€” only the PDF's embedded timestamps change,
+which pipeline tests don't read. The committed fixture is what CI
+consumes; re-run this script locally if you change the ground truth.
+
+Contents: one A4 portrait page with six known strings placed at fixed
+positions near the top. The goal is reproducible ground truth, not a
+realistic bank statement. The pipeline's fake OCR client is seeded with
+those same strings (at plausible bboxes) so the E2E test can assert
+exact matches.
+
+Usage::
+
+    uv run python scripts/create_fixture_pdf.py
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+import fitz  # PyMuPDF
+
+OUT_PATH = (
+    Path(__file__).resolve().parent.parent / "tests" / "fixtures" / "synthetic_giro.pdf"
+)
+
+LINES: list[str] = [
+    "DKB",
+    "IBAN: DE89370400440532013000",
+    "Statement period: 01.03.2026 - 31.03.2026",
+    "Opening balance: 1234.56 EUR",
+    "Closing balance: 1450.22 EUR",
+    "Statement date: 31.03.2026",
+]
+
+
+def build() -> None:
+    doc = fitz.open()
+    # A4 @ 72 dpi -> 595 x 842 points.
+    page = doc.new_page(width=595, height=842)
+    y = 72.0
+    for line in LINES:
+        page.insert_text(
+            (72.0, y),
+            line,
+            fontsize=12,
+            fontname="helv",
+        )
+        y += 24.0
+    OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
+    # deflate=False + garbage=0 keeps the output byte-stable.
+    doc.save(
+        str(OUT_PATH),
+        deflate=False,
+        deflate_images=False,
+        garbage=0,
+        clean=False,
+    )
+    doc.close()
+
+
+if __name__ == "__main__":
+    build()
+    print(f"wrote {OUT_PATH}")
diff --git a/tests/fixtures/synthetic_giro.pdf b/tests/fixtures/synthetic_giro.pdf
new file mode 100644
index 0000000..05e5b20
--- /dev/null
+++ b/tests/fixtures/synthetic_giro.pdf
@@ -0,0 +1,98 @@
+%PDF-1.7
+%ÂµÂ¶
+% Written by MuPDF 1.27.2
+
+1 0 obj
+<</Type/Catalog/Pages 2 0 R/Info<</Producer(MuPDF 1.27.2)>>>>
+endobj
+
+2 0 obj
+<</Type/Pages/Count 1/Kids[4 0 R]>>
+endobj
+
+3 0 obj
+<</Font<</helv 5 0 R>>>>
+endobj
+
+4 0 obj
+<</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 3 0 R/Parent 2 0 R/Contents[6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R]>>
+endobj
+
+5 0 obj
+<</Type/Font/Subtype/Type1/BaseFont/Helvetica/Encoding/WinAnsiEncoding>>
+endobj
+
+6 0 obj
+<</Length 54>>
+stream
+
+q
+BT
+1 0 0 1 72 770 Tm
+/helv 12 Tf [<444b42>]TJ
+ET
+Q
+
+endstream
+endobj
+
+7 0 obj
+<</Length 95/Filter/FlateDecode>>
+stream
+xÚˆ1
+€@û¼"?ð’¬ž‚X6vB:±°P,´°ñýæXf†^„SL8+g4ìU×q,Ê~òÚ£ƒBpØ®	@m­uf–-‚òÅu4K¸Ô4l>Óä´Ð•9
+endstream
+endobj
+
+8 0 obj
+<</Length 105/Filter/FlateDecode>>
+stream
+xÚe‰±
+ACûùŠùg2»3b!ØØ	ÛÉ·‡…6~¿é%ÉK ò‘ËW£\4t¼åðœ¯¯:÷®SjéLÏ™Õ`eÙyÌ=[¬°°pL2H°	ÃÆ'þŸó2nrr—S¦Ò
+endstream
+endobj
+
+9 0 obj
+<</Length 100/Filter/FlateDecode>>
+stream
+xÚÃ±
+Â@EÑ~¾bþÀ™7»oˆ…`c'LR„°Á")lü~÷^Ž|å‘âjc×åtÕ<åòéÇOš»Î·²7ceç44Aç6tk¬°ð@Dô¨AX©#Ü—|É3å-Åyd
+endstream
+endobj
+
+10 0 obj
+<</Length 99/Filter/FlateDecode>>
+stream
+xÚˆ1
+B1û=ÅÞÀÝ÷’±lì„íÄB$-l<¿™©fìk§²ôX¦¸FóúØî5ß?Oxm~;4ê©mP{M„ \'WQ“IˆÖ8Þëbç²«ýÂ·V
+endstream
+endobj
+
+11 0 obj
+<</Length 93/Filter/FlateDecode>>
+stream
+xÚ-ˆ;
+€@ûœ"70ŸÝl#‚ÍvB:±\±ÐÂÆó›Bó)ÆX-úÝÙ®YÐ\ú¬%Ùö •$dÑMHUYš†ã%,jÃê&‡>NT
+endstream
+endobj
+
+xref
+0 12
+0000000000 65535 f 
+0000000042 00000 n 
+0000000120 00000 n 
+0000000172 00000 n 
+0000000213 00000 n 
+0000000352 00000 n 
+0000000441 00000 n 
+0000000544 00000 n 
+0000000707 00000 n 
+0000000881 00000 n 
+0000001050 00000 n 
+0000001218 00000 n 
+
+trailer
+<</Size 12/Root 1 0 R/ID[<C3B4C38E004FC2B6C3A0C2BF4C00C282><890F3E53B827FF9B00CB90D2895721FC>]>>
+startxref
+1380
+%%EOF
diff --git a/tests/unit/test_pipeline_end_to_end.py b/tests/unit/test_pipeline_end_to_end.py
new file mode 100644
index 0000000..44834b2
--- /dev/null
+++ b/tests/unit/test_pipeline_end_to_end.py
@@ -0,0 +1,272 @@
+"""End-to-end pipeline test with the fake OCR + GenAI clients (spec sections 6-9).
+
+Feeds the committed ``tests/fixtures/synthetic_giro.pdf`` through the
+full five-step pipeline with canned OCR + canned LLM responses.
+Hermetic: no Surya, no Ollama, no network.
+"""
+
+from __future__ import annotations
+
+from datetime import date
+from decimal import Decimal
+from pathlib import Path
+from typing import Any
+
+import pytest
+from pydantic import BaseModel
+
+from ix.contracts import (
+    Context,
+    Line,
+    OCRDetails,
+    OCROptions,
+    OCRResult,
+    Options,
+    Page,
+    ProvenanceOptions,
+    RequestIX,
+    SegmentCitation,
+)
+from ix.genai import FakeGenAIClient, GenAIUsage
+from ix.ocr import FakeOCRClient
+from ix.pipeline import Pipeline
+from ix.pipeline.genai_step import GenAIStep
+from ix.pipeline.ocr_step import OCRStep
+from ix.pipeline.reliability_step import ReliabilityStep
+from ix.pipeline.response_handler_step import ResponseHandlerStep
+from ix.pipeline.setup_step import SetupStep
+from ix.use_cases.bank_statement_header import BankStatementHeader
+
+FIXTURE_PDF = Path(__file__).resolve().parent.parent / "fixtures" / "synthetic_giro.pdf"
+
+
+# Ground-truth values. Must match the strings the fixture builder drops on
+# the page AND the canned OCR output below.
+EXPECTED_BANK_NAME = "DKB"
+EXPECTED_IBAN = "DE89370400440532013000"
+EXPECTED_OPENING = Decimal("1234.56")
+EXPECTED_CLOSING = Decimal("1450.22")
+EXPECTED_CURRENCY = "EUR"
+EXPECTED_STATEMENT_DATE = date(2026, 3, 31)
+EXPECTED_PERIOD_START = date(2026, 3, 1)
+EXPECTED_PERIOD_END = date(2026, 3, 31)
+
+
+def _canned_ocr_result() -> OCRResult:
+    """Canned Surya-shaped result for the synthetic_giro fixture.
+
+    Line texts match the strings placed by create_fixture_pdf.py. Bboxes
+    are plausible-but-not-exact: the fixture builder uses 72 pt left
+    margin and 24 pt line height on a 595x842 page, so we mirror those
+    coords here so normalisation gives sensible 0-1 values.
+    """
+    width, height = 595.0, 842.0
+    lines_meta = [
+        ("DKB", 60.0),
+        ("IBAN: DE89370400440532013000", 84.0),
+        ("Statement period: 01.03.2026 - 31.03.2026", 108.0),
+        ("Opening balance: 1234.56 EUR", 132.0),
+        ("Closing balance: 1450.22 EUR", 156.0),
+        ("Statement date: 31.03.2026", 180.0),
+    ]
+    lines: list[Line] = []
+    for text, y_top in lines_meta:
+        y_bot = y_top + 16.0
+        lines.append(
+            Line(
+                text=text,
+                bounding_box=[72.0, y_top, 500.0, y_top, 500.0, y_bot, 72.0, y_bot],
+            )
+        )
+    return OCRResult(
+        result=OCRDetails(
+            text="\n".join(t for t, _ in lines_meta),
+            pages=[
+                Page(
+                    page_no=1,
+                    width=width,
+                    height=height,
+                    lines=lines,
+                )
+            ],
+        ),
+        meta_data={"engine": "fake"},
+    )
+
+
+class _WrappedResponse(BaseModel):
+    """Mirrors the runtime ProvenanceWrappedResponse GenAIStep creates."""
+
+    result: BankStatementHeader
+    segment_citations: list[SegmentCitation] = []
+
+
+def _canned_llm_output() -> _WrappedResponse:
+    # After OCRStep injects <page> tag lines, the real OCR line at local
+    # index 0 gets segment id p1_l0 (tag lines are skipped by
+    # SegmentIndex.build). So:
+    #   p1_l0 -> "DKB"
+    #   p1_l1 -> "IBAN: DE89370400440532013000"
+    #   p1_l2 -> "Statement period: 01.03.2026 - 31.03.2026"
+    #   p1_l3 -> "Opening balance: 1234.56 EUR"
+    #   p1_l4 -> "Closing balance: 1450.22 EUR"
+    #   p1_l5 -> "Statement date: 31.03.2026"
+    return _WrappedResponse(
+        result=BankStatementHeader(
+            bank_name=EXPECTED_BANK_NAME,
+            account_iban=EXPECTED_IBAN,
+            account_type="checking",
+            currency=EXPECTED_CURRENCY,
+            statement_date=EXPECTED_STATEMENT_DATE,
+            statement_period_start=EXPECTED_PERIOD_START,
+            statement_period_end=EXPECTED_PERIOD_END,
+            opening_balance=EXPECTED_OPENING,
+            closing_balance=EXPECTED_CLOSING,
+        ),
+        segment_citations=[
+            SegmentCitation(
+                field_path="result.bank_name",
+                value_segment_ids=["p1_l0"],
+                context_segment_ids=[],
+            ),
+            SegmentCitation(
+                field_path="result.account_iban",
+                value_segment_ids=["p1_l1"],
+                context_segment_ids=[],
+            ),
+            SegmentCitation(
+                field_path="result.account_type",
+                value_segment_ids=[],
+                context_segment_ids=["p1_l0"],
+            ),
+            SegmentCitation(
+                field_path="result.currency",
+                value_segment_ids=["p1_l3", "p1_l4"],
+                context_segment_ids=[],
+            ),
+            SegmentCitation(
+                field_path="result.statement_date",
+                value_segment_ids=["p1_l5"],
+                context_segment_ids=[],
+            ),
+            SegmentCitation(
+                field_path="result.statement_period_start",
+                value_segment_ids=["p1_l2"],
+                context_segment_ids=[],
+            ),
+            SegmentCitation(
+                field_path="result.statement_period_end",
+                value_segment_ids=["p1_l2"],
+                context_segment_ids=[],
+            ),
+            SegmentCitation(
+                field_path="result.opening_balance",
+                value_segment_ids=["p1_l3"],
+                context_segment_ids=[],
+            ),
+            SegmentCitation(
+                field_path="result.closing_balance",
+                value_segment_ids=["p1_l4"],
+                context_segment_ids=[],
+            ),
+        ],
+    )
+
+
+def _build_pipeline(fetch_config: Any = None) -> Pipeline:
+    ocr_client = FakeOCRClient(canned=_canned_ocr_result())
+    genai_client = FakeGenAIClient(
+        parsed=_canned_llm_output(),
+        usage=GenAIUsage(prompt_tokens=200, completion_tokens=400),
+        model_name="fake-gpt",
+    )
+    setup = SetupStep(fetch_config=fetch_config) if fetch_config else SetupStep()
+    return Pipeline(
+        steps=[
+            setup,
+            OCRStep(ocr_client=ocr_client),
+            GenAIStep(genai_client=genai_client),
+            ReliabilityStep(),
+            ResponseHandlerStep(),
+        ]
+    )
+
+
+class TestEndToEnd:
+    @pytest.fixture
+    def request_ix(self, tmp_path: Path) -> RequestIX:
+        # Canonical single-file request pointing to the committed fixture
+        # via file:// URL. Also includes a matching Paperless-style text
+        # so text_agreement has real data to compare against.
+        paperless_text = (
+            "DKB statement. IBAN: DE89370400440532013000. Period 01.03.2026 - "
+            "31.03.2026. Opening balance 1234.56 EUR. Closing balance 1450.22 EUR. "
+            "Date 31.03.2026."
+        )
+        return RequestIX(
+            use_case="bank_statement_header",
+            ix_client_id="mammon-test",
+            request_id="end-to-end-1",
+            ix_id="abcd0123ef456789",
+            context=Context(
+                files=[FIXTURE_PDF.as_uri()],
+                texts=[paperless_text],
+            ),
+            options=Options(
+                ocr=OCROptions(use_ocr=True),
+                provenance=ProvenanceOptions(
+                    include_provenance=True, max_sources_per_field=5
+                ),
+            ),
+        )
+
+    async def test_ix_result_populated_from_fake_llm(self, request_ix: RequestIX) -> None:
+        pipeline = _build_pipeline()
+        response = await pipeline.start(request_ix)
+        assert response.error is None
+        result = response.ix_result.result
+        assert result["bank_name"] == EXPECTED_BANK_NAME
+        assert result["account_iban"] == EXPECTED_IBAN
+        assert result["currency"] == EXPECTED_CURRENCY
+        # Pydantic v2 dumps Decimals as strings in mode="json".
+        assert result["closing_balance"] == str(EXPECTED_CLOSING)
+
+    async def test_provenance_verified_for_closing_balance(
+        self, request_ix: RequestIX
+    ) -> None:
+        pipeline = _build_pipeline()
+        response = await pipeline.start(request_ix)
+        assert response.provenance is not None
+        fp = response.provenance.fields["result.closing_balance"]
+        assert fp.provenance_verified is True
+
+    async def test_text_agreement_true_when_texts_match_value(
+        self, request_ix: RequestIX
+    ) -> None:
+        pipeline = _build_pipeline()
+        response = await pipeline.start(request_ix)
+        assert response.provenance is not None
+        fp = response.provenance.fields["result.closing_balance"]
+        assert fp.text_agreement is True
+
+    async def test_timings_per_step(self, request_ix: RequestIX) -> None:
+        pipeline = _build_pipeline()
+        response = await pipeline.start(request_ix)
+        # Each of the five steps executed and recorded a timing.
+        names = [t["step"] for t in response.metadata.timings]
+        assert names == [
+            "SetupStep",
+            "OCRStep",
+            "GenAIStep",
+            "ReliabilityStep",
+            "ResponseHandlerStep",
+        ]
+        for entry in response.metadata.timings:
+            assert isinstance(entry["elapsed_seconds"], float)
+
+    async def test_no_error_and_context_stripped(self, request_ix: RequestIX) -> None:
+        pipeline = _build_pipeline()
+        response = await pipeline.start(request_ix)
+        assert response.error is None
+        dump = response.model_dump()
+        assert "context" not in dump