feat(use_cases): registry + bank_statement_header (spec §7)
All checks were successful
tests / test (pull_request) Successful in 1m0s
tests / test (push) Successful in 58s

First use case lands. The schema is intentionally flat — nine scalar fields,
no nested arrays — because Ollama's structured-output guidance stays most
reliable when the top level has only scalars, and every field we care about
(bank_name, IBAN, period, opening/closing balance) can be rendered as one.

Registration is explicit in `use_cases/__init__.py`, not a side effect of
importing the use-case module. That keeps load order obvious and lets tests
patch the registry without having to reload modules.

`get_use_case(name)` is the one-liner adapters use; it raises
`IX_001_001` with the offending name in `detail` when the lookup misses,
which keeps log-scrape simple.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dirk Riemann 2026-04-18 10:51:43 +02:00
parent 230068e484
commit b80c7952f7
4 changed files with 183 additions and 0 deletions

View file

@ -0,0 +1,41 @@
"""Use-case registry.
Adding a use case is a two-line change:
1. Write ``ix/use_cases/<name>.py`` exporting
``Request(BaseModel)`` (with ``use_case_name``, ``default_model``,
``system_prompt``) and ``<Response>(BaseModel)`` with the extraction
schema.
2. Add ``"<name>": (Request, <Response>)`` to :data:`REGISTRY` below.
No import-time side effects in use-case modules registration is explicit
so the load order is obvious and tests can patch the registry cleanly.
"""
from __future__ import annotations
from pydantic import BaseModel
from ix.errors import IXErrorCode, IXException
from ix.use_cases.bank_statement_header import BankStatementHeader
from ix.use_cases.bank_statement_header import Request as BankStatementHeaderRequest
REGISTRY: dict[str, tuple[type[BaseModel], type[BaseModel]]] = {
"bank_statement_header": (BankStatementHeaderRequest, BankStatementHeader),
}
def get_use_case(name: str) -> tuple[type[BaseModel], type[BaseModel]]:
"""Look up a registered use case by name.
Raises :class:`IXException` with :attr:`IXErrorCode.IX_001_001` when the
name is not in the registry the offending name is embedded in the
``detail`` for log-scrape.
"""
try:
return REGISTRY[name]
except KeyError as exc:
raise IXException(IXErrorCode.IX_001_001, detail=name) from exc
__all__ = ["REGISTRY", "get_use_case"]

View file

@ -0,0 +1,53 @@
"""`bank_statement_header` — first (and, for MVP, only) use case.
Shape mirrors spec §7. The module defines the pair of Pydantic models
(``Request`` = prompt/model config, ``BankStatementHeader`` = extraction
schema) without registering itself registration happens in
:mod:`ix.use_cases` so import-time side effects stay out.
All header fields are ``Optional`` except ``bank_name`` and ``currency``;
the spec lets every other field be null when the document doesn't show it.
The flat (no-nested-list) schema is chosen because Ollama's structured
output stays most reliable when the top level contains only scalars.
"""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from typing import Literal
from pydantic import BaseModel, ConfigDict
class Request(BaseModel):
"""Prompt + default-model config for this use case."""
model_config = ConfigDict(extra="forbid")
use_case_name: str = "Bank Statement Header"
default_model: str = "gpt-oss:20b"
system_prompt: str = (
"You extract header metadata from a single bank or credit-card statement. "
"Return only facts that appear in the document; leave a field null if uncertain. "
"Balances must use the document's numeric format (e.g. '1234.56' or '-123.45'); "
"do not invent a currency symbol. Account type: 'checking' for current/Giro accounts, "
"'credit' for credit-card statements, 'savings' otherwise. Always return the IBAN "
"with spaces removed. Never fabricate a value to fill a required-looking field."
)
class BankStatementHeader(BaseModel):
"""Extraction schema for the bank-statement header fields."""
model_config = ConfigDict(extra="forbid")
bank_name: str
account_iban: str | None = None
account_type: Literal["checking", "credit", "savings"] | None = None
currency: str
statement_date: date | None = None
statement_period_start: date | None = None
statement_period_end: date | None = None
opening_balance: Decimal | None = None
closing_balance: Decimal | None = None

View file

@ -0,0 +1,54 @@
"""Tests for the first use case, `bank_statement_header` (spec §7)."""
from __future__ import annotations
from datetime import date
from decimal import Decimal
from ix.use_cases.bank_statement_header import BankStatementHeader, Request
class TestRequest:
def test_defaults(self) -> None:
r = Request()
assert r.use_case_name == "Bank Statement Header"
assert r.default_model == "gpt-oss:20b"
# Stable substring for agent/worker tests that want to confirm the
# prompt is what they think it is.
assert "extract header metadata" in r.system_prompt
class TestBankStatementHeader:
def test_all_fields_optional_except_bank_name_and_currency(self) -> None:
# Minimal valid instance (per spec only bank_name + currency are required).
hdr = BankStatementHeader(bank_name="UBS", currency="CHF")
assert hdr.bank_name == "UBS"
assert hdr.currency == "CHF"
assert hdr.account_iban is None
assert hdr.statement_date is None
assert hdr.opening_balance is None
def test_full_populated_instance_roundtrip(self) -> None:
hdr = BankStatementHeader(
bank_name="UBS Switzerland AG",
account_iban="CH9300762011623852957",
account_type="checking",
currency="CHF",
statement_date=date(2026, 3, 31),
statement_period_start=date(2026, 3, 1),
statement_period_end=date(2026, 3, 31),
opening_balance=Decimal("1234.56"),
closing_balance=Decimal("2345.67"),
)
dumped = hdr.model_dump(mode="json")
rt = BankStatementHeader.model_validate(dumped)
assert rt.account_type == "checking"
assert rt.opening_balance == Decimal("1234.56")
assert rt.statement_period_start == date(2026, 3, 1)
def test_account_type_literal_rejects_unknown(self) -> None:
import pytest
from pydantic import ValidationError
with pytest.raises(ValidationError):
BankStatementHeader(bank_name="UBS", currency="CHF", account_type="weird") # type: ignore[arg-type]

View file

@ -0,0 +1,35 @@
"""Use-case registry: lookup, unknown-name error, first use case wired."""
from __future__ import annotations
import pytest
from pydantic import BaseModel
from ix.errors import IXErrorCode, IXException
from ix.use_cases import REGISTRY, get_use_case
from ix.use_cases.bank_statement_header import BankStatementHeader, Request
def test_registry_has_bank_statement_header() -> None:
entry = REGISTRY["bank_statement_header"]
assert entry == (Request, BankStatementHeader)
def test_registry_entry_types_are_basemodel_subclasses() -> None:
req_cls, resp_cls = REGISTRY["bank_statement_header"]
assert issubclass(req_cls, BaseModel)
assert issubclass(resp_cls, BaseModel)
def test_get_use_case_returns_tuple() -> None:
req_cls, resp_cls = get_use_case("bank_statement_header")
assert req_cls is Request
assert resp_cls is BankStatementHeader
def test_get_use_case_unknown_name_raises_ix_001_001() -> None:
with pytest.raises(IXException) as exc_info:
get_use_case("no_such_use_case")
assert exc_info.value.code is IXErrorCode.IX_001_001
# The detail should include the bad name so logs aren't ambiguous.
assert "no_such_use_case" in (exc_info.value.detail or "")