"""Tests for the provenance normalisers (spec ยง6 ReliabilityStep).""" from __future__ import annotations from datetime import date, datetime from decimal import Decimal from typing import Literal from ix.provenance.normalize import ( normalize_date, normalize_iban, normalize_number, normalize_string, should_skip_text_agreement, ) class TestNormalizeString: def test_uppercase_casefolded_and_punctuation_stripped(self) -> None: assert normalize_string(" FOO bar!!! ") == "foo bar" def test_nfkc_applied_for_fullwidth(self) -> None: # Fullwidth capital letters should NFKC-decompose to ASCII. fullwidth_ubs = "\uff35\uff22\uff33" # "UBS" in U+FF00 fullwidth block assert normalize_string(f"{fullwidth_ubs} AG") == "ubs ag" def test_whitespace_collapse(self) -> None: assert normalize_string("UBS Switzerland\tAG") == "ubs switzerland ag" def test_strips_common_punctuation(self) -> None: # Colons, commas, dots, semicolons, parens, slashes. assert normalize_string("Hello, World. (foo); bar: baz / qux") == ( "hello world foo bar baz qux" ) def test_empty_string(self) -> None: assert normalize_string("") == "" class TestNormalizeNumber: def test_chf_swiss_apostrophe_thousands(self) -> None: assert normalize_number("CHF 1'234.56") == "1234.56" def test_de_de_dot_thousands_and_comma_decimal(self) -> None: assert normalize_number("1.234,56 EUR") == "1234.56" def test_negative_sign(self) -> None: assert normalize_number("-123.45") == "-123.45" assert normalize_number("CHF -1'234.56") == "-1234.56" def test_int_input(self) -> None: assert normalize_number(42) == "42.00" def test_float_input(self) -> None: assert normalize_number(1234.5) == "1234.50" def test_decimal_input(self) -> None: assert normalize_number(Decimal("1234.56")) == "1234.56" def test_trailing_zero_is_canonicalised(self) -> None: assert normalize_number("1234.5") == "1234.50" def test_no_decimal_part(self) -> None: assert normalize_number("1234") == "1234.00" class TestNormalizeDate: def test_dayfirst_dotted(self) -> None: assert normalize_date("31.03.2026") == "2026-03-31" def test_iso_date(self) -> None: assert normalize_date("2026-03-31") == "2026-03-31" def test_date_object(self) -> None: assert normalize_date(date(2026, 3, 31)) == "2026-03-31" def test_datetime_object(self) -> None: assert normalize_date(datetime(2026, 3, 31, 10, 30)) == "2026-03-31" def test_slash_variant(self) -> None: assert normalize_date("31/03/2026") == "2026-03-31" class TestNormalizeIban: def test_uppercase_and_strip_whitespace(self) -> None: assert normalize_iban("de 89 3704 0044 0532 0130 00") == "DE89370400440532013000" def test_already_normalised(self) -> None: assert normalize_iban("CH9300762011623852957") == "CH9300762011623852957" def test_tabs_and_newlines(self) -> None: assert normalize_iban("ch 93\t0076\n2011623852957") == "CH9300762011623852957" class TestShouldSkipTextAgreement: def test_short_string_skipped(self) -> None: assert should_skip_text_agreement("AB", str) is True def test_long_string_not_skipped(self) -> None: assert should_skip_text_agreement("ABC", str) is False def test_number_abs_lt_10_skipped(self) -> None: assert should_skip_text_agreement(0, int) is True assert should_skip_text_agreement(9, int) is True assert should_skip_text_agreement(-9, int) is True assert should_skip_text_agreement(9.5, float) is True assert should_skip_text_agreement(Decimal("9.99"), Decimal) is True def test_number_abs_ge_10_not_skipped(self) -> None: assert should_skip_text_agreement(10, int) is False assert should_skip_text_agreement(-10, int) is False assert should_skip_text_agreement(Decimal("1234.56"), Decimal) is False def test_literal_type_skipped(self) -> None: lit = Literal["checking", "credit", "savings"] assert should_skip_text_agreement("checking", lit) is True def test_none_value_skipped(self) -> None: assert should_skip_text_agreement(None, str) is True assert should_skip_text_agreement(None, None) is True def test_numeric_string_treated_as_string(self) -> None: # Short stringified numeric values still trip the short-value rule. assert should_skip_text_agreement("9", str) is True