infoxtractor/src/ix/errors.py
Dirk Riemann ae595c937a
All checks were successful
tests / test (push) Successful in 1m2s
tests / test (pull_request) Successful in 59s
feat(errors): add IXException + IXErrorCode per spec §8
Adds the single exception type used throughout the pipeline. Every failure
maps to one of the ten IX_* codes from the MVP spec §8 with a stable
machine-readable code and an optional free-form detail. The `str()` form is
log-scrapable with a single regex (`IX_xxx_xxx: <msg> (detail=...)`), so
mammon-side reliability UX can classify failures without brittle string
parsing.

Enum values equal names so callers can serialise either.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 10:46:01 +02:00

90 lines
3.1 KiB
Python

"""Error codes + exception type for the infoxtractor pipeline.
Every pipeline-emitted failure maps to one of the ``IX_*`` codes defined in
the MVP spec §8. Callers receive the code (stable, machine-readable) plus a
free-form ``detail`` field (human-readable, may contain the offending URL,
model name, schema snippet, etc.). The ``str()`` form embeds both so the
error is log-scrapable with a single regex.
Example::
raise IXException(IXErrorCode.IX_000_007, detail="https://x/y.pdf: 404")
# -> "IX_000_007: File fetch failed (detail=https://x/y.pdf: 404)"
"""
from __future__ import annotations
from enum import Enum
class IXErrorCode(Enum):
"""Stable error codes. Value == name so callers can serialise either.
The MVP ships exactly the codes in spec §8. Do not add codes here ad-hoc;
every new trigger must be speccéd first so mammon-side reliability UX
stays predictable.
"""
IX_000_000 = "IX_000_000"
IX_000_002 = "IX_000_002"
IX_000_004 = "IX_000_004"
IX_000_005 = "IX_000_005"
IX_000_006 = "IX_000_006"
IX_000_007 = "IX_000_007"
IX_001_000 = "IX_001_000"
IX_001_001 = "IX_001_001"
IX_002_000 = "IX_002_000"
IX_002_001 = "IX_002_001"
@property
def default_message(self) -> str:
"""Human-readable default message for this code (spec §8 wording)."""
return _DEFAULT_MESSAGES[self]
_DEFAULT_MESSAGES: dict[IXErrorCode, str] = {
IXErrorCode.IX_000_000: "request_ix is None",
IXErrorCode.IX_000_002: "No context provided (neither files nor texts)",
IXErrorCode.IX_000_004: (
"OCR artifacts requested (include_geometries / include_ocr_text / "
"ocr_only) but context.files is empty"
),
IXErrorCode.IX_000_005: "File MIME type not supported",
IXErrorCode.IX_000_006: "PDF page-count cap exceeded",
IXErrorCode.IX_000_007: "File fetch failed",
IXErrorCode.IX_001_000: "Extraction context empty after setup",
IXErrorCode.IX_001_001: "Use case name not found in registry",
IXErrorCode.IX_002_000: "Inference backend unavailable",
IXErrorCode.IX_002_001: "Structured output parse failed",
}
class IXException(Exception):
"""Single exception type carrying an :class:`IXErrorCode` + optional detail.
Raised by any pipeline step, adapter, or client when a recoverable-by-the-
caller failure happens. The pipeline orchestrator catches this, writes the
code into ``ResponseIX.error``, and lets the job terminate in ``status=error``.
Parameters
----------
code:
One of the :class:`IXErrorCode` values.
detail:
Optional free-form string (URL, model name, snippet, etc.). Embedded
into ``str(exc)`` as ``(detail=...)`` when present.
"""
def __init__(self, code: IXErrorCode, detail: str | None = None) -> None:
self.code = code
self.detail = detail
super().__init__(self._format())
def _format(self) -> str:
base = f"{self.code.value}: {self.code.default_message}"
if self.detail is None:
return base
return f"{base} (detail={self.detail})"
def __str__(self) -> str:
return self._format()