First deploy done 2026-04-18. E2E extraction of the bank_statement_header use case completes in 35 s against the live service, with 7 of 9 header fields provenance-verified + text-agreement-green. closing_balance asserts from spec §12 all pass. Updates: - README.md: status -> "MVP deployed"; worked example curl snippet; pointers to deployment runbook + spec + plan. - AGENTS.md: status line updated with the live URL + date. - pyproject.toml: version comment referencing the first deploy. - docs/deployment.md: "First deploy" section filled in with times, field-level extraction result, plus a log of every small Docker/ops follow-up PR that had to land to make the first deploy healthy. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
82 lines
2 KiB
TOML
82 lines
2 KiB
TOML
[project]
|
|
name = "infoxtractor"
|
|
version = "0.1.0"
|
|
# Released 2026-04-18 with the first live deploy of the MVP. See
|
|
# docs/deployment.md §"First deploy" for the commit + /healthz times.
|
|
description = "Async on-prem LLM-powered structured information extraction microservice"
|
|
readme = "README.md"
|
|
requires-python = ">=3.12"
|
|
license = { text = "MIT" }
|
|
authors = [{ name = "goldstein" }]
|
|
|
|
dependencies = [
|
|
# Web / async
|
|
"fastapi>=0.115",
|
|
"uvicorn[standard]>=0.32",
|
|
"httpx>=0.27",
|
|
|
|
# Data
|
|
"pydantic>=2.9",
|
|
"pydantic-settings>=2.6",
|
|
|
|
# Database
|
|
"sqlalchemy[asyncio]>=2.0.36",
|
|
"asyncpg>=0.30",
|
|
"alembic>=1.14",
|
|
|
|
# Document processing
|
|
"pymupdf>=1.25",
|
|
"pillow>=10.2,<11.0",
|
|
"python-magic>=0.4.27",
|
|
"python-dateutil>=2.9",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
ocr = [
|
|
# Real OCR engine. Kept optional so CI (no GPU) can install the base
|
|
# package without the model deps.
|
|
# surya >= 0.17 is required: the client code uses the
|
|
# `surya.foundation` module, which older releases don't expose.
|
|
"surya-ocr>=0.17,<0.18",
|
|
"torch>=2.7",
|
|
]
|
|
dev = [
|
|
"pytest>=8.3",
|
|
"pytest-asyncio>=0.24",
|
|
"pytest-httpx>=0.32",
|
|
"ruff>=0.8",
|
|
"mypy>=1.13",
|
|
]
|
|
|
|
# Note: the default pypi torch ships cu13 wheels, which emit a
|
|
# UserWarning and fall back to CPU against the deploy host's CUDA 12.4
|
|
# driver. Surya then runs on CPU — slower but correct for MVP. A future
|
|
# driver upgrade unlocks GPU Surya with no code changes.
|
|
|
|
[build-system]
|
|
requires = ["hatchling"]
|
|
build-backend = "hatchling.build"
|
|
|
|
[tool.hatch.build.targets.wheel]
|
|
packages = ["src/ix"]
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
testpaths = ["tests"]
|
|
addopts = "-ra --strict-markers"
|
|
markers = [
|
|
"live: requires live Ollama/Surya (IX_TEST_OLLAMA=1 to enable)",
|
|
]
|
|
|
|
[tool.ruff]
|
|
line-length = 100
|
|
target-version = "py312"
|
|
|
|
[tool.ruff.lint]
|
|
select = ["E", "F", "W", "I", "UP", "B", "SIM", "RUF"]
|
|
ignore = ["E501"] # line length handled by formatter
|
|
|
|
[tool.mypy]
|
|
python_version = "3.12"
|
|
strict = true
|
|
plugins = ["pydantic.mypy"]
|