Thin wrapper around ix.provenance.apply_reliability_flags. Validate skips entirely when include_provenance is off OR when no provenance data was built (text-only request, etc.). Process reads context.texts + context.use_case_response and lets the verifier mutate the FieldProvenance entries + fill quality_metrics counters in place. 11 unit tests in tests/unit/test_reliability_step.py cover: validate skips on flag off / missing provenance, runs otherwise; per-type flag behaviour (string verified + text_agreement, Literal -> None, None value -> None, short numeric -> text_agreement None, date with both sides parsed, IBAN whitespace-insensitive, disagreement -> False); quality_metrics verified_fields / text_agreement_fields counters. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
250 lines
8.6 KiB
Python
250 lines
8.6 KiB
Python
"""Tests for :class:`ix.pipeline.reliability_step.ReliabilityStep` (spec §6)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import date
|
|
from decimal import Decimal
|
|
|
|
from ix.contracts import (
|
|
BoundingBox,
|
|
Context,
|
|
ExtractionSource,
|
|
FieldProvenance,
|
|
OCROptions,
|
|
Options,
|
|
ProvenanceData,
|
|
ProvenanceOptions,
|
|
RequestIX,
|
|
ResponseIX,
|
|
)
|
|
from ix.contracts.response import _InternalContext
|
|
from ix.pipeline.reliability_step import ReliabilityStep
|
|
from ix.use_cases.bank_statement_header import BankStatementHeader
|
|
|
|
|
|
def _src(
|
|
segment_id: str,
|
|
text: str,
|
|
page: int = 1,
|
|
bbox: list[float] | None = None,
|
|
) -> ExtractionSource:
|
|
return ExtractionSource(
|
|
page_number=page,
|
|
file_index=0,
|
|
bounding_box=BoundingBox(coordinates=bbox or [0, 0, 1, 0, 1, 1, 0, 1]),
|
|
text_snippet=text,
|
|
relevance_score=1.0,
|
|
segment_id=segment_id,
|
|
)
|
|
|
|
|
|
def _make_request(
|
|
include_provenance: bool = True, texts: list[str] | None = None
|
|
) -> RequestIX:
|
|
return RequestIX(
|
|
use_case="bank_statement_header",
|
|
ix_client_id="test",
|
|
request_id="r-1",
|
|
context=Context(files=[], texts=texts or []),
|
|
options=Options(
|
|
ocr=OCROptions(),
|
|
provenance=ProvenanceOptions(include_provenance=include_provenance),
|
|
),
|
|
)
|
|
|
|
|
|
def _response_with_provenance(
|
|
fields: dict[str, FieldProvenance],
|
|
texts: list[str] | None = None,
|
|
) -> ResponseIX:
|
|
resp = ResponseIX()
|
|
resp.provenance = ProvenanceData(
|
|
fields=fields,
|
|
quality_metrics={},
|
|
segment_count=10,
|
|
granularity="line",
|
|
)
|
|
resp.context = _InternalContext(
|
|
texts=texts or [],
|
|
use_case_response=BankStatementHeader,
|
|
)
|
|
return resp
|
|
|
|
|
|
class TestValidate:
|
|
async def test_skipped_when_provenance_off(self) -> None:
|
|
step = ReliabilityStep()
|
|
req = _make_request(include_provenance=False)
|
|
resp = _response_with_provenance(fields={})
|
|
assert await step.validate(req, resp) is False
|
|
|
|
async def test_skipped_when_no_provenance_data(self) -> None:
|
|
step = ReliabilityStep()
|
|
req = _make_request(include_provenance=True)
|
|
resp = ResponseIX()
|
|
assert await step.validate(req, resp) is False
|
|
|
|
async def test_runs_when_provenance_data_present(self) -> None:
|
|
step = ReliabilityStep()
|
|
req = _make_request(include_provenance=True)
|
|
resp = _response_with_provenance(fields={})
|
|
assert await step.validate(req, resp) is True
|
|
|
|
|
|
class TestProcessFlags:
|
|
async def test_string_field_verified_and_text_agreement(self) -> None:
|
|
fp = FieldProvenance(
|
|
field_name="bank_name",
|
|
field_path="result.bank_name",
|
|
value="DKB",
|
|
sources=[_src("p1_l0", "DKB")],
|
|
)
|
|
resp = _response_with_provenance(
|
|
fields={"result.bank_name": fp},
|
|
texts=["DKB statement content"],
|
|
)
|
|
step = ReliabilityStep()
|
|
resp = await step.process(_make_request(texts=["DKB statement content"]), resp)
|
|
out = resp.provenance.fields["result.bank_name"]
|
|
assert out.provenance_verified is True
|
|
assert out.text_agreement is True
|
|
|
|
async def test_literal_field_flags_none(self) -> None:
|
|
fp = FieldProvenance(
|
|
field_name="account_type",
|
|
field_path="result.account_type",
|
|
value="checking",
|
|
sources=[_src("p1_l0", "anything")],
|
|
)
|
|
resp = _response_with_provenance(
|
|
fields={"result.account_type": fp},
|
|
texts=["some text"],
|
|
)
|
|
step = ReliabilityStep()
|
|
resp = await step.process(_make_request(texts=["some text"]), resp)
|
|
out = resp.provenance.fields["result.account_type"]
|
|
assert out.provenance_verified is None
|
|
assert out.text_agreement is None
|
|
|
|
async def test_none_value_flags_none(self) -> None:
|
|
fp = FieldProvenance(
|
|
field_name="account_iban",
|
|
field_path="result.account_iban",
|
|
value=None,
|
|
sources=[_src("p1_l0", "IBAN blah")],
|
|
)
|
|
resp = _response_with_provenance(
|
|
fields={"result.account_iban": fp},
|
|
texts=["text"],
|
|
)
|
|
step = ReliabilityStep()
|
|
resp = await step.process(_make_request(texts=["text"]), resp)
|
|
out = resp.provenance.fields["result.account_iban"]
|
|
assert out.provenance_verified is None
|
|
assert out.text_agreement is None
|
|
|
|
async def test_short_value_text_agreement_skipped(self) -> None:
|
|
# Closing balance value < 10 → short numeric skip rule.
|
|
fp = FieldProvenance(
|
|
field_name="opening_balance",
|
|
field_path="result.opening_balance",
|
|
value=Decimal("5.00"),
|
|
sources=[_src("p1_l0", "balance 5.00")],
|
|
)
|
|
resp = _response_with_provenance(
|
|
fields={"result.opening_balance": fp},
|
|
texts=["balance 5.00"],
|
|
)
|
|
step = ReliabilityStep()
|
|
resp = await step.process(_make_request(texts=["balance 5.00"]), resp)
|
|
out = resp.provenance.fields["result.opening_balance"]
|
|
assert out.provenance_verified is True # bbox cite still runs
|
|
assert out.text_agreement is None # short-value skip
|
|
|
|
async def test_date_field_parses_both_sides(self) -> None:
|
|
fp = FieldProvenance(
|
|
field_name="statement_date",
|
|
field_path="result.statement_date",
|
|
value=date(2026, 3, 31),
|
|
sources=[_src("p1_l0", "Statement date 31.03.2026")],
|
|
)
|
|
resp = _response_with_provenance(
|
|
fields={"result.statement_date": fp},
|
|
texts=["Statement date 2026-03-31"],
|
|
)
|
|
step = ReliabilityStep()
|
|
resp = await step.process(_make_request(texts=["Statement date 2026-03-31"]), resp)
|
|
out = resp.provenance.fields["result.statement_date"]
|
|
assert out.provenance_verified is True
|
|
assert out.text_agreement is True
|
|
|
|
async def test_iban_field_whitespace_ignored(self) -> None:
|
|
fp = FieldProvenance(
|
|
field_name="account_iban",
|
|
field_path="result.account_iban",
|
|
value="DE89370400440532013000",
|
|
sources=[_src("p1_l0", "IBAN DE89 3704 0044 0532 0130 00")],
|
|
)
|
|
resp = _response_with_provenance(
|
|
fields={"result.account_iban": fp},
|
|
texts=["IBAN DE89 3704 0044 0532 0130 00"],
|
|
)
|
|
step = ReliabilityStep()
|
|
resp = await step.process(_make_request(texts=["IBAN DE89 3704 0044 0532 0130 00"]), resp)
|
|
out = resp.provenance.fields["result.account_iban"]
|
|
assert out.provenance_verified is True
|
|
assert out.text_agreement is True
|
|
|
|
async def test_disagreeing_snippet_sets_false(self) -> None:
|
|
fp = FieldProvenance(
|
|
field_name="bank_name",
|
|
field_path="result.bank_name",
|
|
value="DKB",
|
|
sources=[_src("p1_l0", "Commerzbank")],
|
|
)
|
|
resp = _response_with_provenance(
|
|
fields={"result.bank_name": fp},
|
|
texts=["Commerzbank header"],
|
|
)
|
|
step = ReliabilityStep()
|
|
resp = await step.process(_make_request(texts=["Commerzbank header"]), resp)
|
|
out = resp.provenance.fields["result.bank_name"]
|
|
assert out.provenance_verified is False
|
|
assert out.text_agreement is False
|
|
|
|
|
|
class TestCounters:
|
|
async def test_quality_metrics_counters_written(self) -> None:
|
|
fp_ok = FieldProvenance(
|
|
field_name="bank_name",
|
|
field_path="result.bank_name",
|
|
value="DKB",
|
|
sources=[_src("p1_l0", "DKB")],
|
|
)
|
|
fp_bad = FieldProvenance(
|
|
field_name="currency",
|
|
field_path="result.currency",
|
|
value="EUR",
|
|
sources=[_src("p1_l1", "nothing to see")],
|
|
)
|
|
fp_literal = FieldProvenance(
|
|
field_name="account_type",
|
|
field_path="result.account_type",
|
|
value="checking",
|
|
sources=[_src("p1_l2", "anything")],
|
|
)
|
|
resp = _response_with_provenance(
|
|
fields={
|
|
"result.bank_name": fp_ok,
|
|
"result.currency": fp_bad,
|
|
"result.account_type": fp_literal,
|
|
},
|
|
texts=["DKB statement"],
|
|
)
|
|
step = ReliabilityStep()
|
|
resp = await step.process(_make_request(texts=["DKB statement"]), resp)
|
|
|
|
qm = resp.provenance.quality_metrics
|
|
# bank_name verified+agree (2 flags), others not.
|
|
assert qm["verified_fields"] == 1
|
|
assert qm["text_agreement_fields"] == 1
|