feat(scaffold): project skeleton with uv + pytest + forgejo CI (#1)
Some checks are pending
tests / test (push) Waiting to run

Lands Task 1.1 from the MVP plan: empty-project skeleton so later tasks have somewhere to land. Local tests + ruff pass. CI trigger fix included so feat branches get runs going forward.
This commit is contained in:
goldstein 2026-04-18 08:42:56 +00:00
commit 663cb4ae10
12 changed files with 2147 additions and 0 deletions

41
.env.example Normal file
View file

@ -0,0 +1,41 @@
# InfoXtractor runtime configuration.
# Every variable is optional; defaults are suitable for running against the
# on-prem stack (Ollama on :11434, postgis on :5431). Copy to .env and fill in
# the Postgres password.
# --- Job store -----------------------------------------------------------
IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.internal:5431/infoxtractor
# --- LLM backend ---------------------------------------------------------
IX_OLLAMA_URL=http://host.docker.internal:11434
IX_DEFAULT_MODEL=gpt-oss:20b
# --- OCR -----------------------------------------------------------------
IX_OCR_ENGINE=surya
# --- Pipeline behavior ---------------------------------------------------
IX_PIPELINE_WORKER_CONCURRENCY=1
IX_PIPELINE_REQUEST_TIMEOUT_SECONDS=2700
IX_GENAI_CALL_TIMEOUT_SECONDS=1500
IX_RENDER_MAX_PIXELS_PER_PAGE=75000000
# --- File fetching -------------------------------------------------------
IX_TMP_DIR=/tmp/ix
IX_FILE_MAX_BYTES=52428800
IX_FILE_CONNECT_TIMEOUT_SECONDS=10
IX_FILE_READ_TIMEOUT_SECONDS=30
# --- Transport / callbacks ----------------------------------------------
IX_CALLBACK_TIMEOUT_SECONDS=10
# --- Observability -------------------------------------------------------
IX_LOG_LEVEL=INFO
# --- Test-only -----------------------------------------------------------
# Set IX_TEST_MODE=fake to have the pipeline factory build FakeOCRClient /
# FakeGenAIClient instead of real ones (used by integration tests).
# IX_TEST_MODE=fake
# Set IX_TEST_OLLAMA=1 on the Mac to enable tests that require a real
# Ollama / Surya instance (tests/live/).
# IX_TEST_OLLAMA=1

37
.forgejo/workflows/ci.yml Normal file
View file

@ -0,0 +1,37 @@
name: tests
on:
push:
pull_request:
jobs:
test:
runs-on: docker
services:
postgres:
image: postgres:16
env:
POSTGRES_USER: test
POSTGRES_PASSWORD: test
POSTGRES_DB: ix_test
env:
IX_POSTGRES_URL: postgresql+asyncpg://test:test@postgres:5432/ix_test
IX_TEST_MODE: fake
steps:
- uses: actions/checkout@v4
- name: Install system deps for python-magic / PyMuPDF
run: |
apt-get update -qq
apt-get install -y -qq --no-install-recommends libmagic1 libgl1 libglib2.0-0
- name: Install uv
run: curl -LsSf https://astral.sh/uv/install.sh | sh
- name: Lint
run: ~/.local/bin/uv run --extra dev ruff check src tests
- name: Unit + integration tests
run: ~/.local/bin/uv run --extra dev pytest tests/unit tests/integration -v

3
.gitignore vendored
View file

@ -15,3 +15,6 @@ dist/
build/
*.log
/tmp/
# uv
# uv.lock is committed intentionally for reproducible builds.

1
.python-version Normal file
View file

@ -0,0 +1 @@
3.12

73
pyproject.toml Normal file
View file

@ -0,0 +1,73 @@
[project]
name = "infoxtractor"
version = "0.1.0"
description = "Async on-prem LLM-powered structured information extraction microservice"
readme = "README.md"
requires-python = ">=3.12"
license = { text = "MIT" }
authors = [{ name = "goldstein" }]
dependencies = [
# Web / async
"fastapi>=0.115",
"uvicorn[standard]>=0.32",
"httpx>=0.27",
# Data
"pydantic>=2.9",
"pydantic-settings>=2.6",
# Database
"sqlalchemy[asyncio]>=2.0.36",
"asyncpg>=0.30",
"alembic>=1.14",
# Document processing
"pymupdf>=1.25",
"pillow>=10.2,<11.0",
"python-magic>=0.4.27",
"python-dateutil>=2.9",
]
[project.optional-dependencies]
ocr = [
# Real OCR engine — pulls torch + CUDA wheels. Kept optional so CI
# (no GPU) can install the base package without the model deps.
"surya-ocr>=0.9",
"torch>=2.4",
]
dev = [
"pytest>=8.3",
"pytest-asyncio>=0.24",
"pytest-httpx>=0.32",
"ruff>=0.8",
"mypy>=1.13",
]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.hatch.build.targets.wheel]
packages = ["src/ix"]
[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]
addopts = "-ra --strict-markers"
markers = [
"live: requires live Ollama/Surya (IX_TEST_OLLAMA=1 to enable)",
]
[tool.ruff]
line-length = 100
target-version = "py312"
[tool.ruff.lint]
select = ["E", "F", "W", "I", "UP", "B", "SIM", "RUF"]
ignore = ["E501"] # line length handled by formatter
[tool.mypy]
python_version = "3.12"
strict = true
plugins = ["pydantic.mypy"]

3
src/ix/__init__.py Normal file
View file

@ -0,0 +1,3 @@
"""InfoXtractor — async on-prem structured information extraction service."""
__version__ = "0.1.0"

0
tests/__init__.py Normal file
View file

15
tests/conftest.py Normal file
View file

@ -0,0 +1,15 @@
"""Shared pytest fixtures.
Unit-test fixtures live here. Integration-test fixtures (real Postgres, etc.)
will be added in Chunk 3 once the store module lands.
"""
from __future__ import annotations
import pytest
@pytest.fixture
def anyio_backend() -> str:
"""Force asyncio for anyio-based tests (pytest-asyncio uses this)."""
return "asyncio"

View file

0
tests/unit/__init__.py Normal file
View file

View file

@ -0,0 +1,13 @@
"""Smoke test: confirms the package is importable and pyproject is well-formed.
Acts as the minimum-viable test that the project is set up correctly. Gets
replaced with real tests as the chunks land.
"""
from __future__ import annotations
def test_package_importable() -> None:
import ix
assert ix.__version__ == "0.1.0"

1961
uv.lock Normal file

File diff suppressed because it is too large Load diff