Wires the five pipeline steps together with FakeOCRClient + FakeGenAIClient, feeds the committed synthetic_giro.pdf fixture via file:// URL, and asserts the full response shape. - scripts/create_fixture_pdf.py: PyMuPDF-based builder. One-page A4 PDF with six known header strings (bank name, IBAN, period, balances, statement date). Re-runnable on demand; the committed PDF is what CI consumes. - tests/fixtures/synthetic_giro.pdf: committed output. - tests/unit/test_pipeline_end_to_end.py: 5 tests covering * ix_result.result fields populated from the fake LLM * provenance.fields["result.closing_balance"].provenance_verified True * text_agreement True when Paperless-style texts match the value * metadata.timings has one entry per step in the right order * response.error is None and context is not serialised 197 tests total; ruff clean. No integration tests, no real clients, no network. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
272 lines
9.4 KiB
Python
272 lines
9.4 KiB
Python
"""End-to-end pipeline test with the fake OCR + GenAI clients (spec sections 6-9).
|
|
|
|
Feeds the committed ``tests/fixtures/synthetic_giro.pdf`` through the
|
|
full five-step pipeline with canned OCR + canned LLM responses.
|
|
Hermetic: no Surya, no Ollama, no network.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import date
|
|
from decimal import Decimal
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import pytest
|
|
from pydantic import BaseModel
|
|
|
|
from ix.contracts import (
|
|
Context,
|
|
Line,
|
|
OCRDetails,
|
|
OCROptions,
|
|
OCRResult,
|
|
Options,
|
|
Page,
|
|
ProvenanceOptions,
|
|
RequestIX,
|
|
SegmentCitation,
|
|
)
|
|
from ix.genai import FakeGenAIClient, GenAIUsage
|
|
from ix.ocr import FakeOCRClient
|
|
from ix.pipeline import Pipeline
|
|
from ix.pipeline.genai_step import GenAIStep
|
|
from ix.pipeline.ocr_step import OCRStep
|
|
from ix.pipeline.reliability_step import ReliabilityStep
|
|
from ix.pipeline.response_handler_step import ResponseHandlerStep
|
|
from ix.pipeline.setup_step import SetupStep
|
|
from ix.use_cases.bank_statement_header import BankStatementHeader
|
|
|
|
FIXTURE_PDF = Path(__file__).resolve().parent.parent / "fixtures" / "synthetic_giro.pdf"
|
|
|
|
|
|
# Ground-truth values. Must match the strings the fixture builder drops on
|
|
# the page AND the canned OCR output below.
|
|
EXPECTED_BANK_NAME = "DKB"
|
|
EXPECTED_IBAN = "DE89370400440532013000"
|
|
EXPECTED_OPENING = Decimal("1234.56")
|
|
EXPECTED_CLOSING = Decimal("1450.22")
|
|
EXPECTED_CURRENCY = "EUR"
|
|
EXPECTED_STATEMENT_DATE = date(2026, 3, 31)
|
|
EXPECTED_PERIOD_START = date(2026, 3, 1)
|
|
EXPECTED_PERIOD_END = date(2026, 3, 31)
|
|
|
|
|
|
def _canned_ocr_result() -> OCRResult:
|
|
"""Canned Surya-shaped result for the synthetic_giro fixture.
|
|
|
|
Line texts match the strings placed by create_fixture_pdf.py. Bboxes
|
|
are plausible-but-not-exact: the fixture builder uses 72 pt left
|
|
margin and 24 pt line height on a 595x842 page, so we mirror those
|
|
coords here so normalisation gives sensible 0-1 values.
|
|
"""
|
|
width, height = 595.0, 842.0
|
|
lines_meta = [
|
|
("DKB", 60.0),
|
|
("IBAN: DE89370400440532013000", 84.0),
|
|
("Statement period: 01.03.2026 - 31.03.2026", 108.0),
|
|
("Opening balance: 1234.56 EUR", 132.0),
|
|
("Closing balance: 1450.22 EUR", 156.0),
|
|
("Statement date: 31.03.2026", 180.0),
|
|
]
|
|
lines: list[Line] = []
|
|
for text, y_top in lines_meta:
|
|
y_bot = y_top + 16.0
|
|
lines.append(
|
|
Line(
|
|
text=text,
|
|
bounding_box=[72.0, y_top, 500.0, y_top, 500.0, y_bot, 72.0, y_bot],
|
|
)
|
|
)
|
|
return OCRResult(
|
|
result=OCRDetails(
|
|
text="\n".join(t for t, _ in lines_meta),
|
|
pages=[
|
|
Page(
|
|
page_no=1,
|
|
width=width,
|
|
height=height,
|
|
lines=lines,
|
|
)
|
|
],
|
|
),
|
|
meta_data={"engine": "fake"},
|
|
)
|
|
|
|
|
|
class _WrappedResponse(BaseModel):
|
|
"""Mirrors the runtime ProvenanceWrappedResponse GenAIStep creates."""
|
|
|
|
result: BankStatementHeader
|
|
segment_citations: list[SegmentCitation] = []
|
|
|
|
|
|
def _canned_llm_output() -> _WrappedResponse:
|
|
# After OCRStep injects <page> tag lines, the real OCR line at local
|
|
# index 0 gets segment id p1_l0 (tag lines are skipped by
|
|
# SegmentIndex.build). So:
|
|
# p1_l0 -> "DKB"
|
|
# p1_l1 -> "IBAN: DE89370400440532013000"
|
|
# p1_l2 -> "Statement period: 01.03.2026 - 31.03.2026"
|
|
# p1_l3 -> "Opening balance: 1234.56 EUR"
|
|
# p1_l4 -> "Closing balance: 1450.22 EUR"
|
|
# p1_l5 -> "Statement date: 31.03.2026"
|
|
return _WrappedResponse(
|
|
result=BankStatementHeader(
|
|
bank_name=EXPECTED_BANK_NAME,
|
|
account_iban=EXPECTED_IBAN,
|
|
account_type="checking",
|
|
currency=EXPECTED_CURRENCY,
|
|
statement_date=EXPECTED_STATEMENT_DATE,
|
|
statement_period_start=EXPECTED_PERIOD_START,
|
|
statement_period_end=EXPECTED_PERIOD_END,
|
|
opening_balance=EXPECTED_OPENING,
|
|
closing_balance=EXPECTED_CLOSING,
|
|
),
|
|
segment_citations=[
|
|
SegmentCitation(
|
|
field_path="result.bank_name",
|
|
value_segment_ids=["p1_l0"],
|
|
context_segment_ids=[],
|
|
),
|
|
SegmentCitation(
|
|
field_path="result.account_iban",
|
|
value_segment_ids=["p1_l1"],
|
|
context_segment_ids=[],
|
|
),
|
|
SegmentCitation(
|
|
field_path="result.account_type",
|
|
value_segment_ids=[],
|
|
context_segment_ids=["p1_l0"],
|
|
),
|
|
SegmentCitation(
|
|
field_path="result.currency",
|
|
value_segment_ids=["p1_l3", "p1_l4"],
|
|
context_segment_ids=[],
|
|
),
|
|
SegmentCitation(
|
|
field_path="result.statement_date",
|
|
value_segment_ids=["p1_l5"],
|
|
context_segment_ids=[],
|
|
),
|
|
SegmentCitation(
|
|
field_path="result.statement_period_start",
|
|
value_segment_ids=["p1_l2"],
|
|
context_segment_ids=[],
|
|
),
|
|
SegmentCitation(
|
|
field_path="result.statement_period_end",
|
|
value_segment_ids=["p1_l2"],
|
|
context_segment_ids=[],
|
|
),
|
|
SegmentCitation(
|
|
field_path="result.opening_balance",
|
|
value_segment_ids=["p1_l3"],
|
|
context_segment_ids=[],
|
|
),
|
|
SegmentCitation(
|
|
field_path="result.closing_balance",
|
|
value_segment_ids=["p1_l4"],
|
|
context_segment_ids=[],
|
|
),
|
|
],
|
|
)
|
|
|
|
|
|
def _build_pipeline(fetch_config: Any = None) -> Pipeline:
|
|
ocr_client = FakeOCRClient(canned=_canned_ocr_result())
|
|
genai_client = FakeGenAIClient(
|
|
parsed=_canned_llm_output(),
|
|
usage=GenAIUsage(prompt_tokens=200, completion_tokens=400),
|
|
model_name="fake-gpt",
|
|
)
|
|
setup = SetupStep(fetch_config=fetch_config) if fetch_config else SetupStep()
|
|
return Pipeline(
|
|
steps=[
|
|
setup,
|
|
OCRStep(ocr_client=ocr_client),
|
|
GenAIStep(genai_client=genai_client),
|
|
ReliabilityStep(),
|
|
ResponseHandlerStep(),
|
|
]
|
|
)
|
|
|
|
|
|
class TestEndToEnd:
|
|
@pytest.fixture
|
|
def request_ix(self, tmp_path: Path) -> RequestIX:
|
|
# Canonical single-file request pointing to the committed fixture
|
|
# via file:// URL. Also includes a matching Paperless-style text
|
|
# so text_agreement has real data to compare against.
|
|
paperless_text = (
|
|
"DKB statement. IBAN: DE89370400440532013000. Period 01.03.2026 - "
|
|
"31.03.2026. Opening balance 1234.56 EUR. Closing balance 1450.22 EUR. "
|
|
"Date 31.03.2026."
|
|
)
|
|
return RequestIX(
|
|
use_case="bank_statement_header",
|
|
ix_client_id="mammon-test",
|
|
request_id="end-to-end-1",
|
|
ix_id="abcd0123ef456789",
|
|
context=Context(
|
|
files=[FIXTURE_PDF.as_uri()],
|
|
texts=[paperless_text],
|
|
),
|
|
options=Options(
|
|
ocr=OCROptions(use_ocr=True),
|
|
provenance=ProvenanceOptions(
|
|
include_provenance=True, max_sources_per_field=5
|
|
),
|
|
),
|
|
)
|
|
|
|
async def test_ix_result_populated_from_fake_llm(self, request_ix: RequestIX) -> None:
|
|
pipeline = _build_pipeline()
|
|
response = await pipeline.start(request_ix)
|
|
assert response.error is None
|
|
result = response.ix_result.result
|
|
assert result["bank_name"] == EXPECTED_BANK_NAME
|
|
assert result["account_iban"] == EXPECTED_IBAN
|
|
assert result["currency"] == EXPECTED_CURRENCY
|
|
# Pydantic v2 dumps Decimals as strings in mode="json".
|
|
assert result["closing_balance"] == str(EXPECTED_CLOSING)
|
|
|
|
async def test_provenance_verified_for_closing_balance(
|
|
self, request_ix: RequestIX
|
|
) -> None:
|
|
pipeline = _build_pipeline()
|
|
response = await pipeline.start(request_ix)
|
|
assert response.provenance is not None
|
|
fp = response.provenance.fields["result.closing_balance"]
|
|
assert fp.provenance_verified is True
|
|
|
|
async def test_text_agreement_true_when_texts_match_value(
|
|
self, request_ix: RequestIX
|
|
) -> None:
|
|
pipeline = _build_pipeline()
|
|
response = await pipeline.start(request_ix)
|
|
assert response.provenance is not None
|
|
fp = response.provenance.fields["result.closing_balance"]
|
|
assert fp.text_agreement is True
|
|
|
|
async def test_timings_per_step(self, request_ix: RequestIX) -> None:
|
|
pipeline = _build_pipeline()
|
|
response = await pipeline.start(request_ix)
|
|
# Each of the five steps executed and recorded a timing.
|
|
names = [t["step"] for t in response.metadata.timings]
|
|
assert names == [
|
|
"SetupStep",
|
|
"OCRStep",
|
|
"GenAIStep",
|
|
"ReliabilityStep",
|
|
"ResponseHandlerStep",
|
|
]
|
|
for entry in response.metadata.timings:
|
|
assert isinstance(entry["elapsed_seconds"], float)
|
|
|
|
async def test_no_error_and_context_stripped(self, request_ix: RequestIX) -> None:
|
|
pipeline = _build_pipeline()
|
|
response = await pipeline.start(request_ix)
|
|
assert response.error is None
|
|
dump = response.model_dump()
|
|
assert "context" not in dump
|