"""End-to-end pipeline test with the fake OCR + GenAI clients (spec sections 6-9). Feeds the committed ``tests/fixtures/synthetic_giro.pdf`` through the full five-step pipeline with canned OCR + canned LLM responses. Hermetic: no Surya, no Ollama, no network. """ from __future__ import annotations from datetime import date from decimal import Decimal from pathlib import Path from typing import Any import pytest from pydantic import BaseModel from ix.contracts import ( Context, Line, OCRDetails, OCROptions, OCRResult, Options, Page, ProvenanceOptions, RequestIX, SegmentCitation, ) from ix.genai import FakeGenAIClient, GenAIUsage from ix.ocr import FakeOCRClient from ix.pipeline import Pipeline from ix.pipeline.genai_step import GenAIStep from ix.pipeline.ocr_step import OCRStep from ix.pipeline.reliability_step import ReliabilityStep from ix.pipeline.response_handler_step import ResponseHandlerStep from ix.pipeline.setup_step import SetupStep from ix.use_cases.bank_statement_header import BankStatementHeader FIXTURE_PDF = Path(__file__).resolve().parent.parent / "fixtures" / "synthetic_giro.pdf" # Ground-truth values. Must match the strings the fixture builder drops on # the page AND the canned OCR output below. EXPECTED_BANK_NAME = "DKB" EXPECTED_IBAN = "DE89370400440532013000" EXPECTED_OPENING = Decimal("1234.56") EXPECTED_CLOSING = Decimal("1450.22") EXPECTED_CURRENCY = "EUR" EXPECTED_STATEMENT_DATE = date(2026, 3, 31) EXPECTED_PERIOD_START = date(2026, 3, 1) EXPECTED_PERIOD_END = date(2026, 3, 31) def _canned_ocr_result() -> OCRResult: """Canned Surya-shaped result for the synthetic_giro fixture. Line texts match the strings placed by create_fixture_pdf.py. Bboxes are plausible-but-not-exact: the fixture builder uses 72 pt left margin and 24 pt line height on a 595x842 page, so we mirror those coords here so normalisation gives sensible 0-1 values. """ width, height = 595.0, 842.0 lines_meta = [ ("DKB", 60.0), ("IBAN: DE89370400440532013000", 84.0), ("Statement period: 01.03.2026 - 31.03.2026", 108.0), ("Opening balance: 1234.56 EUR", 132.0), ("Closing balance: 1450.22 EUR", 156.0), ("Statement date: 31.03.2026", 180.0), ] lines: list[Line] = [] for text, y_top in lines_meta: y_bot = y_top + 16.0 lines.append( Line( text=text, bounding_box=[72.0, y_top, 500.0, y_top, 500.0, y_bot, 72.0, y_bot], ) ) return OCRResult( result=OCRDetails( text="\n".join(t for t, _ in lines_meta), pages=[ Page( page_no=1, width=width, height=height, lines=lines, ) ], ), meta_data={"engine": "fake"}, ) class _WrappedResponse(BaseModel): """Mirrors the runtime ProvenanceWrappedResponse GenAIStep creates.""" result: BankStatementHeader segment_citations: list[SegmentCitation] = [] def _canned_llm_output() -> _WrappedResponse: # After OCRStep injects tag lines, the real OCR line at local # index 0 gets segment id p1_l0 (tag lines are skipped by # SegmentIndex.build). So: # p1_l0 -> "DKB" # p1_l1 -> "IBAN: DE89370400440532013000" # p1_l2 -> "Statement period: 01.03.2026 - 31.03.2026" # p1_l3 -> "Opening balance: 1234.56 EUR" # p1_l4 -> "Closing balance: 1450.22 EUR" # p1_l5 -> "Statement date: 31.03.2026" return _WrappedResponse( result=BankStatementHeader( bank_name=EXPECTED_BANK_NAME, account_iban=EXPECTED_IBAN, account_type="checking", currency=EXPECTED_CURRENCY, statement_date=EXPECTED_STATEMENT_DATE, statement_period_start=EXPECTED_PERIOD_START, statement_period_end=EXPECTED_PERIOD_END, opening_balance=EXPECTED_OPENING, closing_balance=EXPECTED_CLOSING, ), segment_citations=[ SegmentCitation( field_path="result.bank_name", value_segment_ids=["p1_l0"], context_segment_ids=[], ), SegmentCitation( field_path="result.account_iban", value_segment_ids=["p1_l1"], context_segment_ids=[], ), SegmentCitation( field_path="result.account_type", value_segment_ids=[], context_segment_ids=["p1_l0"], ), SegmentCitation( field_path="result.currency", value_segment_ids=["p1_l3", "p1_l4"], context_segment_ids=[], ), SegmentCitation( field_path="result.statement_date", value_segment_ids=["p1_l5"], context_segment_ids=[], ), SegmentCitation( field_path="result.statement_period_start", value_segment_ids=["p1_l2"], context_segment_ids=[], ), SegmentCitation( field_path="result.statement_period_end", value_segment_ids=["p1_l2"], context_segment_ids=[], ), SegmentCitation( field_path="result.opening_balance", value_segment_ids=["p1_l3"], context_segment_ids=[], ), SegmentCitation( field_path="result.closing_balance", value_segment_ids=["p1_l4"], context_segment_ids=[], ), ], ) def _build_pipeline(fetch_config: Any = None) -> Pipeline: ocr_client = FakeOCRClient(canned=_canned_ocr_result()) genai_client = FakeGenAIClient( parsed=_canned_llm_output(), usage=GenAIUsage(prompt_tokens=200, completion_tokens=400), model_name="fake-gpt", ) setup = SetupStep(fetch_config=fetch_config) if fetch_config else SetupStep() return Pipeline( steps=[ setup, OCRStep(ocr_client=ocr_client), GenAIStep(genai_client=genai_client), ReliabilityStep(), ResponseHandlerStep(), ] ) class TestEndToEnd: @pytest.fixture def request_ix(self, tmp_path: Path) -> RequestIX: # Canonical single-file request pointing to the committed fixture # via file:// URL. Also includes a matching Paperless-style text # so text_agreement has real data to compare against. paperless_text = ( "DKB statement. IBAN: DE89370400440532013000. Period 01.03.2026 - " "31.03.2026. Opening balance 1234.56 EUR. Closing balance 1450.22 EUR. " "Date 31.03.2026." ) return RequestIX( use_case="bank_statement_header", ix_client_id="mammon-test", request_id="end-to-end-1", ix_id="abcd0123ef456789", context=Context( files=[FIXTURE_PDF.as_uri()], texts=[paperless_text], ), options=Options( ocr=OCROptions(use_ocr=True), provenance=ProvenanceOptions( include_provenance=True, max_sources_per_field=5 ), ), ) async def test_ix_result_populated_from_fake_llm(self, request_ix: RequestIX) -> None: pipeline = _build_pipeline() response = await pipeline.start(request_ix) assert response.error is None result = response.ix_result.result assert result["bank_name"] == EXPECTED_BANK_NAME assert result["account_iban"] == EXPECTED_IBAN assert result["currency"] == EXPECTED_CURRENCY # Pydantic v2 dumps Decimals as strings in mode="json". assert result["closing_balance"] == str(EXPECTED_CLOSING) async def test_provenance_verified_for_closing_balance( self, request_ix: RequestIX ) -> None: pipeline = _build_pipeline() response = await pipeline.start(request_ix) assert response.provenance is not None fp = response.provenance.fields["result.closing_balance"] assert fp.provenance_verified is True async def test_text_agreement_true_when_texts_match_value( self, request_ix: RequestIX ) -> None: pipeline = _build_pipeline() response = await pipeline.start(request_ix) assert response.provenance is not None fp = response.provenance.fields["result.closing_balance"] assert fp.text_agreement is True async def test_timings_per_step(self, request_ix: RequestIX) -> None: pipeline = _build_pipeline() response = await pipeline.start(request_ix) # Each of the five steps executed and recorded a timing. names = [t["step"] for t in response.metadata.timings] assert names == [ "SetupStep", "OCRStep", "GenAIStep", "ReliabilityStep", "ResponseHandlerStep", ] for entry in response.metadata.timings: assert isinstance(entry["elapsed_seconds"], float) async def test_no_error_and_context_stripped(self, request_ix: RequestIX) -> None: pipeline = _build_pipeline() response = await pipeline.start(request_ix) assert response.error is None dump = response.model_dump() assert "context" not in dump