infoxtractor/src/ix/use_cases/bank_statement_header.py

"""`bank_statement_header` — first (and, for MVP, only) use case.

Shape mirrors spec §7. The module defines the pair of Pydantic models
(``Request`` = prompt/model config, ``BankStatementHeader`` = extraction
schema) without registering itself — registration happens in
:mod:`ix.use_cases` so import-time side effects stay out.

All header fields are ``Optional`` except ``bank_name`` and ``currency``;
the spec lets every other field be null when the document doesn't show it.
The flat (no-nested-list) schema is chosen because Ollama's structured
output stays most reliable when the top level contains only scalars.
"""

from __future__ import annotations

from datetime import date
from decimal import Decimal
from typing import Literal

from pydantic import BaseModel, ConfigDict


class Request(BaseModel):
    """Prompt + default-model config for this use case."""

    model_config = ConfigDict(extra="forbid")

    use_case_name: str = "Bank Statement Header"
    default_model: str = "gpt-oss:20b"
    system_prompt: str = (
        "You extract header metadata from a single bank or credit-card statement. "
        "Return only facts that appear in the document; leave a field null if uncertain. "
        "Balances must use the document's numeric format (e.g. '1234.56' or '-123.45'); "
        "do not invent a currency symbol. Account type: 'checking' for current/Giro accounts, "
        "'credit' for credit-card statements, 'savings' otherwise. Always return the IBAN "
        "with spaces removed. Never fabricate a value to fill a required-looking field."
    )


class BankStatementHeader(BaseModel):
    """Extraction schema for the bank-statement header fields."""

    model_config = ConfigDict(extra="forbid")

    bank_name: str
    account_iban: str | None = None
    account_type: Literal["checking", "credit", "savings"] | None = None
    currency: str
    statement_date: date | None = None
    statement_period_start: date | None = None
    statement_period_end: date | None = None
    opening_balance: Decimal | None = None
    closing_balance: Decimal | None = None