Compare commits
2 commits
118d77c428
...
a54a968313
| Author | SHA1 | Date | |
|---|---|---|---|
| a54a968313 | |||
| b109bba873 |
3 changed files with 436 additions and 0 deletions
66
scripts/create_fixture_pdf.py
Normal file
66
scripts/create_fixture_pdf.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""Build the synthetic E2E fixture PDF at ``tests/fixtures/synthetic_giro.pdf``.
|
||||
|
||||
Re-runnable on demand. Output bytes are stable across runs in page
|
||||
content, layout, and text — only the PDF's embedded timestamps change,
|
||||
which pipeline tests don't read. The committed fixture is what CI
|
||||
consumes; re-run this script locally if you change the ground truth.
|
||||
|
||||
Contents: one A4 portrait page with six known strings placed at fixed
|
||||
positions near the top. The goal is reproducible ground truth, not a
|
||||
realistic bank statement. The pipeline's fake OCR client is seeded with
|
||||
those same strings (at plausible bboxes) so the E2E test can assert
|
||||
exact matches.
|
||||
|
||||
Usage::
|
||||
|
||||
uv run python scripts/create_fixture_pdf.py
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import fitz # PyMuPDF
|
||||
|
||||
OUT_PATH = (
|
||||
Path(__file__).resolve().parent.parent / "tests" / "fixtures" / "synthetic_giro.pdf"
|
||||
)
|
||||
|
||||
LINES: list[str] = [
|
||||
"DKB",
|
||||
"IBAN: DE89370400440532013000",
|
||||
"Statement period: 01.03.2026 - 31.03.2026",
|
||||
"Opening balance: 1234.56 EUR",
|
||||
"Closing balance: 1450.22 EUR",
|
||||
"Statement date: 31.03.2026",
|
||||
]
|
||||
|
||||
|
||||
def build() -> None:
|
||||
doc = fitz.open()
|
||||
# A4 @ 72 dpi -> 595 x 842 points.
|
||||
page = doc.new_page(width=595, height=842)
|
||||
y = 72.0
|
||||
for line in LINES:
|
||||
page.insert_text(
|
||||
(72.0, y),
|
||||
line,
|
||||
fontsize=12,
|
||||
fontname="helv",
|
||||
)
|
||||
y += 24.0
|
||||
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
# deflate=False + garbage=0 keeps the output byte-stable.
|
||||
doc.save(
|
||||
str(OUT_PATH),
|
||||
deflate=False,
|
||||
deflate_images=False,
|
||||
garbage=0,
|
||||
clean=False,
|
||||
)
|
||||
doc.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
build()
|
||||
print(f"wrote {OUT_PATH}")
|
||||
98
tests/fixtures/synthetic_giro.pdf
vendored
Normal file
98
tests/fixtures/synthetic_giro.pdf
vendored
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
%PDF-1.7
|
||||
%µ¶
|
||||
% Written by MuPDF 1.27.2
|
||||
|
||||
1 0 obj
|
||||
<</Type/Catalog/Pages 2 0 R/Info<</Producer(MuPDF 1.27.2)>>>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<</Type/Pages/Count 1/Kids[4 0 R]>>
|
||||
endobj
|
||||
|
||||
3 0 obj
|
||||
<</Font<</helv 5 0 R>>>>
|
||||
endobj
|
||||
|
||||
4 0 obj
|
||||
<</Type/Page/MediaBox[0 0 595 842]/Rotate 0/Resources 3 0 R/Parent 2 0 R/Contents[6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R]>>
|
||||
endobj
|
||||
|
||||
5 0 obj
|
||||
<</Type/Font/Subtype/Type1/BaseFont/Helvetica/Encoding/WinAnsiEncoding>>
|
||||
endobj
|
||||
|
||||
6 0 obj
|
||||
<</Length 54>>
|
||||
stream
|
||||
|
||||
q
|
||||
BT
|
||||
1 0 0 1 72 770 Tm
|
||||
/helv 12 Tf [<444b42>]TJ
|
||||
ET
|
||||
Q
|
||||
|
||||
endstream
|
||||
endobj
|
||||
|
||||
7 0 obj
|
||||
<</Length 95/Filter/FlateDecode>>
|
||||
stream
|
||||
xÚˆ1
|
||||
€@û¼"?𒬞‚X6vB:±°P,´°ñýæXf†^<1A>„SL8+g4ìU×q,Ê~òÚ£ƒBpØ® @muf–-‚òÅu4
K¸Ô4l>Óä´Ð•9
|
||||
endstream
|
||||
endobj
|
||||
|
||||
8 0 obj
|
||||
<</Length 105/Filter/FlateDecode>>
|
||||
stream
|
||||
xÚe‰±
|
||||
ACûùŠùg2»3b!ØØ ÛÉ·‡…6~¿é%ÉK ò‘ËW£\4t¼å𜯯:÷®<C3B7>S<EFBFBD>jéLÏ<4C>™Õ`eÙyÌ=[¬°°pL2H° ÃÆ'þŸó2nrr—S¦Ò
|
||||
endstream
|
||||
endobj
|
||||
|
||||
9 0 obj
|
||||
<</Length 100/Filter/FlateDecode>>
|
||||
stream
|
||||
xÚ
ñ
|
||||
Â@EÑ~¾bþÀ™7»o
ˆ…`c'LR„°Á")lü~÷^Ž|å‘âjc×åtÕ<åòéÇOš»Î·²7ceç44Aç6tk¬°ð@Dô¨AX©#Ü—|É3å-Åyd
|
||||
endstream
|
||||
endobj
|
||||
|
||||
10 0 obj
|
||||
<</Length 99/Filter/FlateDecode>>
|
||||
stream
|
||||
xÚ
ˆ1
|
||||
B1û=ÅÞÀÝ÷’±lì„íÄB$-l<¿™©fìk§²ôX¦¸FóúØî5ß?Oxm~;4ê©mP{M
„ \'WQ<57>“<><E2809C><EFBFBD>IˆÖ8Þëb粫ý·V
|
||||
endstream
|
||||
endobj
|
||||
|
||||
11 0 obj
|
||||
<</Length 93/Filter/FlateDecode>>
|
||||
stream
|
||||
xÚ-ˆ;
|
||||
€@ûœ"70ŸÝl#‚ÍvB:±\±ÐÂÆó›Bó)ÆX-ú
ÝÙ®YÐ\ú¬%Ùö •$dÑMHUYš†ã%,jÃê&‡>NT
|
||||
endstream
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 12
|
||||
0000000000 65535 f
|
||||
0000000042 00000 n
|
||||
0000000120 00000 n
|
||||
0000000172 00000 n
|
||||
0000000213 00000 n
|
||||
0000000352 00000 n
|
||||
0000000441 00000 n
|
||||
0000000544 00000 n
|
||||
0000000707 00000 n
|
||||
0000000881 00000 n
|
||||
0000001050 00000 n
|
||||
0000001218 00000 n
|
||||
|
||||
trailer
|
||||
<</Size 12/Root 1 0 R/ID[<C3B4C38E004FC2B6C3A0C2BF4C00C282><890F3E53B827FF9B00CB90D2895721FC>]>>
|
||||
startxref
|
||||
1380
|
||||
%%EOF
|
||||
272
tests/unit/test_pipeline_end_to_end.py
Normal file
272
tests/unit/test_pipeline_end_to_end.py
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
"""End-to-end pipeline test with the fake OCR + GenAI clients (spec sections 6-9).
|
||||
|
||||
Feeds the committed ``tests/fixtures/synthetic_giro.pdf`` through the
|
||||
full five-step pipeline with canned OCR + canned LLM responses.
|
||||
Hermetic: no Surya, no Ollama, no network.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
from ix.contracts import (
|
||||
Context,
|
||||
Line,
|
||||
OCRDetails,
|
||||
OCROptions,
|
||||
OCRResult,
|
||||
Options,
|
||||
Page,
|
||||
ProvenanceOptions,
|
||||
RequestIX,
|
||||
SegmentCitation,
|
||||
)
|
||||
from ix.genai import FakeGenAIClient, GenAIUsage
|
||||
from ix.ocr import FakeOCRClient
|
||||
from ix.pipeline import Pipeline
|
||||
from ix.pipeline.genai_step import GenAIStep
|
||||
from ix.pipeline.ocr_step import OCRStep
|
||||
from ix.pipeline.reliability_step import ReliabilityStep
|
||||
from ix.pipeline.response_handler_step import ResponseHandlerStep
|
||||
from ix.pipeline.setup_step import SetupStep
|
||||
from ix.use_cases.bank_statement_header import BankStatementHeader
|
||||
|
||||
FIXTURE_PDF = Path(__file__).resolve().parent.parent / "fixtures" / "synthetic_giro.pdf"
|
||||
|
||||
|
||||
# Ground-truth values. Must match the strings the fixture builder drops on
|
||||
# the page AND the canned OCR output below.
|
||||
EXPECTED_BANK_NAME = "DKB"
|
||||
EXPECTED_IBAN = "DE89370400440532013000"
|
||||
EXPECTED_OPENING = Decimal("1234.56")
|
||||
EXPECTED_CLOSING = Decimal("1450.22")
|
||||
EXPECTED_CURRENCY = "EUR"
|
||||
EXPECTED_STATEMENT_DATE = date(2026, 3, 31)
|
||||
EXPECTED_PERIOD_START = date(2026, 3, 1)
|
||||
EXPECTED_PERIOD_END = date(2026, 3, 31)
|
||||
|
||||
|
||||
def _canned_ocr_result() -> OCRResult:
|
||||
"""Canned Surya-shaped result for the synthetic_giro fixture.
|
||||
|
||||
Line texts match the strings placed by create_fixture_pdf.py. Bboxes
|
||||
are plausible-but-not-exact: the fixture builder uses 72 pt left
|
||||
margin and 24 pt line height on a 595x842 page, so we mirror those
|
||||
coords here so normalisation gives sensible 0-1 values.
|
||||
"""
|
||||
width, height = 595.0, 842.0
|
||||
lines_meta = [
|
||||
("DKB", 60.0),
|
||||
("IBAN: DE89370400440532013000", 84.0),
|
||||
("Statement period: 01.03.2026 - 31.03.2026", 108.0),
|
||||
("Opening balance: 1234.56 EUR", 132.0),
|
||||
("Closing balance: 1450.22 EUR", 156.0),
|
||||
("Statement date: 31.03.2026", 180.0),
|
||||
]
|
||||
lines: list[Line] = []
|
||||
for text, y_top in lines_meta:
|
||||
y_bot = y_top + 16.0
|
||||
lines.append(
|
||||
Line(
|
||||
text=text,
|
||||
bounding_box=[72.0, y_top, 500.0, y_top, 500.0, y_bot, 72.0, y_bot],
|
||||
)
|
||||
)
|
||||
return OCRResult(
|
||||
result=OCRDetails(
|
||||
text="\n".join(t for t, _ in lines_meta),
|
||||
pages=[
|
||||
Page(
|
||||
page_no=1,
|
||||
width=width,
|
||||
height=height,
|
||||
lines=lines,
|
||||
)
|
||||
],
|
||||
),
|
||||
meta_data={"engine": "fake"},
|
||||
)
|
||||
|
||||
|
||||
class _WrappedResponse(BaseModel):
|
||||
"""Mirrors the runtime ProvenanceWrappedResponse GenAIStep creates."""
|
||||
|
||||
result: BankStatementHeader
|
||||
segment_citations: list[SegmentCitation] = []
|
||||
|
||||
|
||||
def _canned_llm_output() -> _WrappedResponse:
|
||||
# After OCRStep injects <page> tag lines, the real OCR line at local
|
||||
# index 0 gets segment id p1_l0 (tag lines are skipped by
|
||||
# SegmentIndex.build). So:
|
||||
# p1_l0 -> "DKB"
|
||||
# p1_l1 -> "IBAN: DE89370400440532013000"
|
||||
# p1_l2 -> "Statement period: 01.03.2026 - 31.03.2026"
|
||||
# p1_l3 -> "Opening balance: 1234.56 EUR"
|
||||
# p1_l4 -> "Closing balance: 1450.22 EUR"
|
||||
# p1_l5 -> "Statement date: 31.03.2026"
|
||||
return _WrappedResponse(
|
||||
result=BankStatementHeader(
|
||||
bank_name=EXPECTED_BANK_NAME,
|
||||
account_iban=EXPECTED_IBAN,
|
||||
account_type="checking",
|
||||
currency=EXPECTED_CURRENCY,
|
||||
statement_date=EXPECTED_STATEMENT_DATE,
|
||||
statement_period_start=EXPECTED_PERIOD_START,
|
||||
statement_period_end=EXPECTED_PERIOD_END,
|
||||
opening_balance=EXPECTED_OPENING,
|
||||
closing_balance=EXPECTED_CLOSING,
|
||||
),
|
||||
segment_citations=[
|
||||
SegmentCitation(
|
||||
field_path="result.bank_name",
|
||||
value_segment_ids=["p1_l0"],
|
||||
context_segment_ids=[],
|
||||
),
|
||||
SegmentCitation(
|
||||
field_path="result.account_iban",
|
||||
value_segment_ids=["p1_l1"],
|
||||
context_segment_ids=[],
|
||||
),
|
||||
SegmentCitation(
|
||||
field_path="result.account_type",
|
||||
value_segment_ids=[],
|
||||
context_segment_ids=["p1_l0"],
|
||||
),
|
||||
SegmentCitation(
|
||||
field_path="result.currency",
|
||||
value_segment_ids=["p1_l3", "p1_l4"],
|
||||
context_segment_ids=[],
|
||||
),
|
||||
SegmentCitation(
|
||||
field_path="result.statement_date",
|
||||
value_segment_ids=["p1_l5"],
|
||||
context_segment_ids=[],
|
||||
),
|
||||
SegmentCitation(
|
||||
field_path="result.statement_period_start",
|
||||
value_segment_ids=["p1_l2"],
|
||||
context_segment_ids=[],
|
||||
),
|
||||
SegmentCitation(
|
||||
field_path="result.statement_period_end",
|
||||
value_segment_ids=["p1_l2"],
|
||||
context_segment_ids=[],
|
||||
),
|
||||
SegmentCitation(
|
||||
field_path="result.opening_balance",
|
||||
value_segment_ids=["p1_l3"],
|
||||
context_segment_ids=[],
|
||||
),
|
||||
SegmentCitation(
|
||||
field_path="result.closing_balance",
|
||||
value_segment_ids=["p1_l4"],
|
||||
context_segment_ids=[],
|
||||
),
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _build_pipeline(fetch_config: Any = None) -> Pipeline:
|
||||
ocr_client = FakeOCRClient(canned=_canned_ocr_result())
|
||||
genai_client = FakeGenAIClient(
|
||||
parsed=_canned_llm_output(),
|
||||
usage=GenAIUsage(prompt_tokens=200, completion_tokens=400),
|
||||
model_name="fake-gpt",
|
||||
)
|
||||
setup = SetupStep(fetch_config=fetch_config) if fetch_config else SetupStep()
|
||||
return Pipeline(
|
||||
steps=[
|
||||
setup,
|
||||
OCRStep(ocr_client=ocr_client),
|
||||
GenAIStep(genai_client=genai_client),
|
||||
ReliabilityStep(),
|
||||
ResponseHandlerStep(),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
class TestEndToEnd:
|
||||
@pytest.fixture
|
||||
def request_ix(self, tmp_path: Path) -> RequestIX:
|
||||
# Canonical single-file request pointing to the committed fixture
|
||||
# via file:// URL. Also includes a matching Paperless-style text
|
||||
# so text_agreement has real data to compare against.
|
||||
paperless_text = (
|
||||
"DKB statement. IBAN: DE89370400440532013000. Period 01.03.2026 - "
|
||||
"31.03.2026. Opening balance 1234.56 EUR. Closing balance 1450.22 EUR. "
|
||||
"Date 31.03.2026."
|
||||
)
|
||||
return RequestIX(
|
||||
use_case="bank_statement_header",
|
||||
ix_client_id="mammon-test",
|
||||
request_id="end-to-end-1",
|
||||
ix_id="abcd0123ef456789",
|
||||
context=Context(
|
||||
files=[FIXTURE_PDF.as_uri()],
|
||||
texts=[paperless_text],
|
||||
),
|
||||
options=Options(
|
||||
ocr=OCROptions(use_ocr=True),
|
||||
provenance=ProvenanceOptions(
|
||||
include_provenance=True, max_sources_per_field=5
|
||||
),
|
||||
),
|
||||
)
|
||||
|
||||
async def test_ix_result_populated_from_fake_llm(self, request_ix: RequestIX) -> None:
|
||||
pipeline = _build_pipeline()
|
||||
response = await pipeline.start(request_ix)
|
||||
assert response.error is None
|
||||
result = response.ix_result.result
|
||||
assert result["bank_name"] == EXPECTED_BANK_NAME
|
||||
assert result["account_iban"] == EXPECTED_IBAN
|
||||
assert result["currency"] == EXPECTED_CURRENCY
|
||||
# Pydantic v2 dumps Decimals as strings in mode="json".
|
||||
assert result["closing_balance"] == str(EXPECTED_CLOSING)
|
||||
|
||||
async def test_provenance_verified_for_closing_balance(
|
||||
self, request_ix: RequestIX
|
||||
) -> None:
|
||||
pipeline = _build_pipeline()
|
||||
response = await pipeline.start(request_ix)
|
||||
assert response.provenance is not None
|
||||
fp = response.provenance.fields["result.closing_balance"]
|
||||
assert fp.provenance_verified is True
|
||||
|
||||
async def test_text_agreement_true_when_texts_match_value(
|
||||
self, request_ix: RequestIX
|
||||
) -> None:
|
||||
pipeline = _build_pipeline()
|
||||
response = await pipeline.start(request_ix)
|
||||
assert response.provenance is not None
|
||||
fp = response.provenance.fields["result.closing_balance"]
|
||||
assert fp.text_agreement is True
|
||||
|
||||
async def test_timings_per_step(self, request_ix: RequestIX) -> None:
|
||||
pipeline = _build_pipeline()
|
||||
response = await pipeline.start(request_ix)
|
||||
# Each of the five steps executed and recorded a timing.
|
||||
names = [t["step"] for t in response.metadata.timings]
|
||||
assert names == [
|
||||
"SetupStep",
|
||||
"OCRStep",
|
||||
"GenAIStep",
|
||||
"ReliabilityStep",
|
||||
"ResponseHandlerStep",
|
||||
]
|
||||
for entry in response.metadata.timings:
|
||||
assert isinstance(entry["elapsed_seconds"], float)
|
||||
|
||||
async def test_no_error_and_context_stripped(self, request_ix: RequestIX) -> None:
|
||||
pipeline = _build_pipeline()
|
||||
response = await pipeline.start(request_ix)
|
||||
assert response.error is None
|
||||
dump = response.model_dump()
|
||||
assert "context" not in dump
|
||||
Loading…
Reference in a new issue