Lands the two remaining provenance-subsystem pieces:
mapper.py — map_segment_refs_to_provenance:
- For each LLM SegmentCitation, pick seg-ids per source_type
(`value` vs `value_and_context`), cap at max_sources_per_field,
resolve each via SegmentIndex, track invalid references.
- Resolve field values by dot-path (`result.items[0].name` supported —
`[N]` bracket notation is normalised to `.N` before traversal).
- Skip fields that resolve to zero valid sources (spec §9.4).
- Write quality_metrics with fields_with_provenance / total_fields /
coverage_rate / invalid_references.
verify.py — verify_field + apply_reliability_flags:
- Dispatches per Pydantic field type: date → parse-both-sides compare;
int/float/Decimal → normalize + whole-snippet / numeric-token scan;
IBAN (detected via `iban` in field name) → upper+strip compare;
Literal / None → flags stay None; else string substring.
- _unwrap_optional handles BOTH typing.Union AND types.UnionType so
`Decimal | None` (PEP 604, what get_type_hints emits on 3.12+) resolves
correctly — caught by the integration-style test_writes_flags_and_counters.
- Number comparator scans numeric tokens in the snippet so labels
("Closing balance CHF 1'234.56") don't mask the match.
- apply_reliability_flags mutates the passed ProvenanceData in place and
writes verified_fields / text_agreement_fields to quality_metrics.
Tests cover each comparator, Literal/None skip, short-value skip (strings
and numerics), Decimal via optional union, and end-to-end flag+counter
writing against a Pydantic use-case schema that mirrors bank_statement_header.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
220 lines
7.2 KiB
Python
220 lines
7.2 KiB
Python
"""Tests for the reliability verifier (spec §6 ReliabilityStep)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
from datetime import date
|
|
from decimal import Decimal
|
|
from typing import Literal
|
|
|
|
from pydantic import BaseModel
|
|
|
|
from ix.contracts import (
|
|
ExtractionSource,
|
|
FieldProvenance,
|
|
ProvenanceData,
|
|
)
|
|
from ix.provenance.verify import apply_reliability_flags, verify_field
|
|
|
|
|
|
def _make_fp(
|
|
*,
|
|
field_path: str,
|
|
value: object,
|
|
snippets: list[str],
|
|
) -> FieldProvenance:
|
|
return FieldProvenance(
|
|
field_name=field_path.split(".")[-1],
|
|
field_path=field_path,
|
|
value=value,
|
|
sources=[
|
|
ExtractionSource(
|
|
page_number=1,
|
|
file_index=0,
|
|
text_snippet=s,
|
|
relevance_score=1.0,
|
|
segment_id=f"p1_l{i}",
|
|
)
|
|
for i, s in enumerate(snippets)
|
|
],
|
|
)
|
|
|
|
|
|
class TestVerifyFieldByType:
|
|
def test_string_substring_match(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.bank_name",
|
|
value="UBS AG",
|
|
snippets=["Account at UBS AG, Zurich"],
|
|
)
|
|
pv, ta = verify_field(fp, str, texts=[])
|
|
assert pv is True
|
|
assert ta is None
|
|
|
|
def test_string_mismatch_is_false(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.bank_name",
|
|
value="UBS AG",
|
|
snippets=["Credit Suisse"],
|
|
)
|
|
pv, _ = verify_field(fp, str, texts=[])
|
|
assert pv is False
|
|
|
|
def test_number_decimal_match_ignores_currency(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.closing_balance",
|
|
value=Decimal("1234.56"),
|
|
snippets=["CHF 1'234.56"],
|
|
)
|
|
pv, _ = verify_field(fp, Decimal, texts=[])
|
|
assert pv is True
|
|
|
|
def test_number_mismatch(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.closing_balance",
|
|
value=Decimal("1234.56"),
|
|
snippets=["CHF 9999.99"],
|
|
)
|
|
pv, _ = verify_field(fp, Decimal, texts=[])
|
|
assert pv is False
|
|
|
|
def test_date_parse_both_sides(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.statement_date",
|
|
value=date(2026, 3, 31),
|
|
snippets=["Statement date: 31.03.2026"],
|
|
)
|
|
pv, _ = verify_field(fp, date, texts=[])
|
|
assert pv is True
|
|
|
|
def test_iban_strip_and_case(self) -> None:
|
|
# IBAN detection: field name contains "iban".
|
|
fp = _make_fp(
|
|
field_path="result.account_iban",
|
|
value="CH9300762011623852957",
|
|
snippets=["Account CH93 0076 2011 6238 5295 7"],
|
|
)
|
|
pv, _ = verify_field(fp, str, texts=[])
|
|
assert pv is True
|
|
|
|
def test_literal_field_both_flags_none(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.account_type",
|
|
value="checking",
|
|
snippets=["the word checking is literally here"],
|
|
)
|
|
pv, ta = verify_field(fp, Literal["checking", "credit", "savings"], texts=["checking"])
|
|
assert pv is None
|
|
assert ta is None
|
|
|
|
def test_none_value_both_flags_none(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.account_iban",
|
|
value=None,
|
|
snippets=["whatever"],
|
|
)
|
|
pv, ta = verify_field(fp, str, texts=["whatever"])
|
|
assert pv is None
|
|
assert ta is None
|
|
|
|
|
|
class TestTextAgreement:
|
|
def test_text_agreement_with_texts_true(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.bank_name",
|
|
value="UBS AG",
|
|
snippets=["UBS AG"],
|
|
)
|
|
_, ta = verify_field(fp, str, texts=["Account at UBS AG"])
|
|
assert ta is True
|
|
|
|
def test_text_agreement_with_texts_false(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.bank_name",
|
|
value="UBS AG",
|
|
snippets=["UBS AG"],
|
|
)
|
|
_, ta = verify_field(fp, str, texts=["Credit Suisse"])
|
|
assert ta is False
|
|
|
|
def test_text_agreement_no_texts_is_none(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.bank_name",
|
|
value="UBS AG",
|
|
snippets=["UBS AG"],
|
|
)
|
|
_, ta = verify_field(fp, str, texts=[])
|
|
assert ta is None
|
|
|
|
def test_short_value_skips_text_agreement(self) -> None:
|
|
# 2-char string
|
|
fp = _make_fp(
|
|
field_path="result.code",
|
|
value="XY",
|
|
snippets=["code XY here"],
|
|
)
|
|
pv, ta = verify_field(fp, str, texts=["another XY reference"])
|
|
# provenance_verified still runs; text_agreement is skipped.
|
|
assert pv is True
|
|
assert ta is None
|
|
|
|
def test_small_number_skips_text_agreement(self) -> None:
|
|
fp = _make_fp(
|
|
field_path="result.n",
|
|
value=5,
|
|
snippets=["value 5 here"],
|
|
)
|
|
pv, ta = verify_field(fp, int, texts=["the number 5"])
|
|
assert pv is True
|
|
assert ta is None
|
|
|
|
|
|
class TestApplyReliabilityFlags:
|
|
def test_writes_flags_and_counters(self) -> None:
|
|
class BankHeader(BaseModel):
|
|
bank_name: str
|
|
account_iban: str | None = None
|
|
closing_balance: Decimal | None = None
|
|
account_type: Literal["checking", "credit", "savings"] | None = None
|
|
|
|
prov = ProvenanceData(
|
|
fields={
|
|
"result.bank_name": _make_fp(
|
|
field_path="result.bank_name",
|
|
value="UBS AG",
|
|
snippets=["Account at UBS AG"],
|
|
),
|
|
"result.account_iban": _make_fp(
|
|
field_path="result.account_iban",
|
|
value="CH9300762011623852957",
|
|
snippets=["IBAN CH93 0076 2011 6238 5295 7"],
|
|
),
|
|
"result.closing_balance": _make_fp(
|
|
field_path="result.closing_balance",
|
|
value=Decimal("1234.56"),
|
|
snippets=["Closing balance CHF 1'234.56"],
|
|
),
|
|
"result.account_type": _make_fp(
|
|
field_path="result.account_type",
|
|
value="checking",
|
|
snippets=["current account (checking)"],
|
|
),
|
|
},
|
|
)
|
|
apply_reliability_flags(prov, BankHeader, texts=["Account at UBS AG at CH9300762011623852957"])
|
|
|
|
fields = prov.fields
|
|
assert fields["result.bank_name"].provenance_verified is True
|
|
assert fields["result.bank_name"].text_agreement is True
|
|
assert fields["result.account_iban"].provenance_verified is True
|
|
assert fields["result.closing_balance"].provenance_verified is True
|
|
# account_type is Literal → both flags None.
|
|
assert fields["result.account_type"].provenance_verified is None
|
|
assert fields["result.account_type"].text_agreement is None
|
|
|
|
# Counters record only True values.
|
|
qm = prov.quality_metrics
|
|
assert qm["verified_fields"] == 3 # all except Literal
|
|
# text_agreement_fields counts only fields where the flag is True.
|
|
# bank_name True; IBAN True (appears in texts after normalisation);
|
|
# closing_balance -- '1234.56' doesn't appear in the text.
|
|
assert qm["text_agreement_fields"] >= 1
|