infoxtractor/tests/unit/test_provenance_verify.py
Dirk Riemann 1e340c82fa
All checks were successful
tests / test (pull_request) Successful in 1m10s
tests / test (push) Successful in 1m11s
feat(provenance): mapper + verifier for ReliabilityStep (spec §9.4, §6)
Lands the two remaining provenance-subsystem pieces:

mapper.py — map_segment_refs_to_provenance:
- For each LLM SegmentCitation, pick seg-ids per source_type
  (`value` vs `value_and_context`), cap at max_sources_per_field,
  resolve each via SegmentIndex, track invalid references.
- Resolve field values by dot-path (`result.items[0].name` supported —
  `[N]` bracket notation is normalised to `.N` before traversal).
- Skip fields that resolve to zero valid sources (spec §9.4).
- Write quality_metrics with fields_with_provenance / total_fields /
  coverage_rate / invalid_references.

verify.py — verify_field + apply_reliability_flags:
- Dispatches per Pydantic field type: date → parse-both-sides compare;
  int/float/Decimal → normalize + whole-snippet / numeric-token scan;
  IBAN (detected via `iban` in field name) → upper+strip compare;
  Literal / None → flags stay None; else string substring.
- _unwrap_optional handles BOTH typing.Union AND types.UnionType so
  `Decimal | None` (PEP 604, what get_type_hints emits on 3.12+) resolves
  correctly — caught by the integration-style test_writes_flags_and_counters.
- Number comparator scans numeric tokens in the snippet so labels
  ("Closing balance CHF 1'234.56") don't mask the match.
- apply_reliability_flags mutates the passed ProvenanceData in place and
  writes verified_fields / text_agreement_fields to quality_metrics.

Tests cover each comparator, Literal/None skip, short-value skip (strings
and numerics), Decimal via optional union, and end-to-end flag+counter
writing against a Pydantic use-case schema that mirrors bank_statement_header.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 11:01:19 +02:00

220 lines
7.2 KiB
Python

"""Tests for the reliability verifier (spec §6 ReliabilityStep)."""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Literal
from pydantic import BaseModel
from ix.contracts import (
ExtractionSource,
FieldProvenance,
ProvenanceData,
)
from ix.provenance.verify import apply_reliability_flags, verify_field
def _make_fp(
*,
field_path: str,
value: object,
snippets: list[str],
) -> FieldProvenance:
return FieldProvenance(
field_name=field_path.split(".")[-1],
field_path=field_path,
value=value,
sources=[
ExtractionSource(
page_number=1,
file_index=0,
text_snippet=s,
relevance_score=1.0,
segment_id=f"p1_l{i}",
)
for i, s in enumerate(snippets)
],
)
class TestVerifyFieldByType:
def test_string_substring_match(self) -> None:
fp = _make_fp(
field_path="result.bank_name",
value="UBS AG",
snippets=["Account at UBS AG, Zurich"],
)
pv, ta = verify_field(fp, str, texts=[])
assert pv is True
assert ta is None
def test_string_mismatch_is_false(self) -> None:
fp = _make_fp(
field_path="result.bank_name",
value="UBS AG",
snippets=["Credit Suisse"],
)
pv, _ = verify_field(fp, str, texts=[])
assert pv is False
def test_number_decimal_match_ignores_currency(self) -> None:
fp = _make_fp(
field_path="result.closing_balance",
value=Decimal("1234.56"),
snippets=["CHF 1'234.56"],
)
pv, _ = verify_field(fp, Decimal, texts=[])
assert pv is True
def test_number_mismatch(self) -> None:
fp = _make_fp(
field_path="result.closing_balance",
value=Decimal("1234.56"),
snippets=["CHF 9999.99"],
)
pv, _ = verify_field(fp, Decimal, texts=[])
assert pv is False
def test_date_parse_both_sides(self) -> None:
fp = _make_fp(
field_path="result.statement_date",
value=date(2026, 3, 31),
snippets=["Statement date: 31.03.2026"],
)
pv, _ = verify_field(fp, date, texts=[])
assert pv is True
def test_iban_strip_and_case(self) -> None:
# IBAN detection: field name contains "iban".
fp = _make_fp(
field_path="result.account_iban",
value="CH9300762011623852957",
snippets=["Account CH93 0076 2011 6238 5295 7"],
)
pv, _ = verify_field(fp, str, texts=[])
assert pv is True
def test_literal_field_both_flags_none(self) -> None:
fp = _make_fp(
field_path="result.account_type",
value="checking",
snippets=["the word checking is literally here"],
)
pv, ta = verify_field(fp, Literal["checking", "credit", "savings"], texts=["checking"])
assert pv is None
assert ta is None
def test_none_value_both_flags_none(self) -> None:
fp = _make_fp(
field_path="result.account_iban",
value=None,
snippets=["whatever"],
)
pv, ta = verify_field(fp, str, texts=["whatever"])
assert pv is None
assert ta is None
class TestTextAgreement:
def test_text_agreement_with_texts_true(self) -> None:
fp = _make_fp(
field_path="result.bank_name",
value="UBS AG",
snippets=["UBS AG"],
)
_, ta = verify_field(fp, str, texts=["Account at UBS AG"])
assert ta is True
def test_text_agreement_with_texts_false(self) -> None:
fp = _make_fp(
field_path="result.bank_name",
value="UBS AG",
snippets=["UBS AG"],
)
_, ta = verify_field(fp, str, texts=["Credit Suisse"])
assert ta is False
def test_text_agreement_no_texts_is_none(self) -> None:
fp = _make_fp(
field_path="result.bank_name",
value="UBS AG",
snippets=["UBS AG"],
)
_, ta = verify_field(fp, str, texts=[])
assert ta is None
def test_short_value_skips_text_agreement(self) -> None:
# 2-char string
fp = _make_fp(
field_path="result.code",
value="XY",
snippets=["code XY here"],
)
pv, ta = verify_field(fp, str, texts=["another XY reference"])
# provenance_verified still runs; text_agreement is skipped.
assert pv is True
assert ta is None
def test_small_number_skips_text_agreement(self) -> None:
fp = _make_fp(
field_path="result.n",
value=5,
snippets=["value 5 here"],
)
pv, ta = verify_field(fp, int, texts=["the number 5"])
assert pv is True
assert ta is None
class TestApplyReliabilityFlags:
def test_writes_flags_and_counters(self) -> None:
class BankHeader(BaseModel):
bank_name: str
account_iban: str | None = None
closing_balance: Decimal | None = None
account_type: Literal["checking", "credit", "savings"] | None = None
prov = ProvenanceData(
fields={
"result.bank_name": _make_fp(
field_path="result.bank_name",
value="UBS AG",
snippets=["Account at UBS AG"],
),
"result.account_iban": _make_fp(
field_path="result.account_iban",
value="CH9300762011623852957",
snippets=["IBAN CH93 0076 2011 6238 5295 7"],
),
"result.closing_balance": _make_fp(
field_path="result.closing_balance",
value=Decimal("1234.56"),
snippets=["Closing balance CHF 1'234.56"],
),
"result.account_type": _make_fp(
field_path="result.account_type",
value="checking",
snippets=["current account (checking)"],
),
},
)
apply_reliability_flags(prov, BankHeader, texts=["Account at UBS AG at CH9300762011623852957"])
fields = prov.fields
assert fields["result.bank_name"].provenance_verified is True
assert fields["result.bank_name"].text_agreement is True
assert fields["result.account_iban"].provenance_verified is True
assert fields["result.closing_balance"].provenance_verified is True
# account_type is Literal → both flags None.
assert fields["result.account_type"].provenance_verified is None
assert fields["result.account_type"].text_agreement is None
# Counters record only True values.
qm = prov.quality_metrics
assert qm["verified_fields"] == 3 # all except Literal
# text_agreement_fields counts only fields where the flag is True.
# bank_name True; IBAN True (appears in texts after normalisation);
# closing_balance -- '1234.56' doesn't appear in the text.
assert qm["text_agreement_fields"] >= 1