Compare commits
No commits in common. "663cb4ae104b3c90135a59766f04dddea3213e5d" and "86538ee8de7497115e7a93ac7e16bd8254226720" have entirely different histories.
663cb4ae10
...
86538ee8de
12 changed files with 0 additions and 2147 deletions
41
.env.example
41
.env.example
|
|
@ -1,41 +0,0 @@
|
||||||
# InfoXtractor runtime configuration.
|
|
||||||
# Every variable is optional; defaults are suitable for running against the
|
|
||||||
# on-prem stack (Ollama on :11434, postgis on :5431). Copy to .env and fill in
|
|
||||||
# the Postgres password.
|
|
||||||
|
|
||||||
# --- Job store -----------------------------------------------------------
|
|
||||||
IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.internal:5431/infoxtractor
|
|
||||||
|
|
||||||
# --- LLM backend ---------------------------------------------------------
|
|
||||||
IX_OLLAMA_URL=http://host.docker.internal:11434
|
|
||||||
IX_DEFAULT_MODEL=gpt-oss:20b
|
|
||||||
|
|
||||||
# --- OCR -----------------------------------------------------------------
|
|
||||||
IX_OCR_ENGINE=surya
|
|
||||||
|
|
||||||
# --- Pipeline behavior ---------------------------------------------------
|
|
||||||
IX_PIPELINE_WORKER_CONCURRENCY=1
|
|
||||||
IX_PIPELINE_REQUEST_TIMEOUT_SECONDS=2700
|
|
||||||
IX_GENAI_CALL_TIMEOUT_SECONDS=1500
|
|
||||||
IX_RENDER_MAX_PIXELS_PER_PAGE=75000000
|
|
||||||
|
|
||||||
# --- File fetching -------------------------------------------------------
|
|
||||||
IX_TMP_DIR=/tmp/ix
|
|
||||||
IX_FILE_MAX_BYTES=52428800
|
|
||||||
IX_FILE_CONNECT_TIMEOUT_SECONDS=10
|
|
||||||
IX_FILE_READ_TIMEOUT_SECONDS=30
|
|
||||||
|
|
||||||
# --- Transport / callbacks ----------------------------------------------
|
|
||||||
IX_CALLBACK_TIMEOUT_SECONDS=10
|
|
||||||
|
|
||||||
# --- Observability -------------------------------------------------------
|
|
||||||
IX_LOG_LEVEL=INFO
|
|
||||||
|
|
||||||
# --- Test-only -----------------------------------------------------------
|
|
||||||
# Set IX_TEST_MODE=fake to have the pipeline factory build FakeOCRClient /
|
|
||||||
# FakeGenAIClient instead of real ones (used by integration tests).
|
|
||||||
# IX_TEST_MODE=fake
|
|
||||||
|
|
||||||
# Set IX_TEST_OLLAMA=1 on the Mac to enable tests that require a real
|
|
||||||
# Ollama / Surya instance (tests/live/).
|
|
||||||
# IX_TEST_OLLAMA=1
|
|
||||||
|
|
@ -1,37 +0,0 @@
|
||||||
name: tests
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
pull_request:
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
test:
|
|
||||||
runs-on: docker
|
|
||||||
services:
|
|
||||||
postgres:
|
|
||||||
image: postgres:16
|
|
||||||
env:
|
|
||||||
POSTGRES_USER: test
|
|
||||||
POSTGRES_PASSWORD: test
|
|
||||||
POSTGRES_DB: ix_test
|
|
||||||
|
|
||||||
env:
|
|
||||||
IX_POSTGRES_URL: postgresql+asyncpg://test:test@postgres:5432/ix_test
|
|
||||||
IX_TEST_MODE: fake
|
|
||||||
|
|
||||||
steps:
|
|
||||||
- uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Install system deps for python-magic / PyMuPDF
|
|
||||||
run: |
|
|
||||||
apt-get update -qq
|
|
||||||
apt-get install -y -qq --no-install-recommends libmagic1 libgl1 libglib2.0-0
|
|
||||||
|
|
||||||
- name: Install uv
|
|
||||||
run: curl -LsSf https://astral.sh/uv/install.sh | sh
|
|
||||||
|
|
||||||
- name: Lint
|
|
||||||
run: ~/.local/bin/uv run --extra dev ruff check src tests
|
|
||||||
|
|
||||||
- name: Unit + integration tests
|
|
||||||
run: ~/.local/bin/uv run --extra dev pytest tests/unit tests/integration -v
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -15,6 +15,3 @@ dist/
|
||||||
build/
|
build/
|
||||||
*.log
|
*.log
|
||||||
/tmp/
|
/tmp/
|
||||||
# uv
|
|
||||||
# uv.lock is committed intentionally for reproducible builds.
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
3.12
|
|
||||||
|
|
@ -1,73 +0,0 @@
|
||||||
[project]
|
|
||||||
name = "infoxtractor"
|
|
||||||
version = "0.1.0"
|
|
||||||
description = "Async on-prem LLM-powered structured information extraction microservice"
|
|
||||||
readme = "README.md"
|
|
||||||
requires-python = ">=3.12"
|
|
||||||
license = { text = "MIT" }
|
|
||||||
authors = [{ name = "goldstein" }]
|
|
||||||
|
|
||||||
dependencies = [
|
|
||||||
# Web / async
|
|
||||||
"fastapi>=0.115",
|
|
||||||
"uvicorn[standard]>=0.32",
|
|
||||||
"httpx>=0.27",
|
|
||||||
|
|
||||||
# Data
|
|
||||||
"pydantic>=2.9",
|
|
||||||
"pydantic-settings>=2.6",
|
|
||||||
|
|
||||||
# Database
|
|
||||||
"sqlalchemy[asyncio]>=2.0.36",
|
|
||||||
"asyncpg>=0.30",
|
|
||||||
"alembic>=1.14",
|
|
||||||
|
|
||||||
# Document processing
|
|
||||||
"pymupdf>=1.25",
|
|
||||||
"pillow>=10.2,<11.0",
|
|
||||||
"python-magic>=0.4.27",
|
|
||||||
"python-dateutil>=2.9",
|
|
||||||
]
|
|
||||||
|
|
||||||
[project.optional-dependencies]
|
|
||||||
ocr = [
|
|
||||||
# Real OCR engine — pulls torch + CUDA wheels. Kept optional so CI
|
|
||||||
# (no GPU) can install the base package without the model deps.
|
|
||||||
"surya-ocr>=0.9",
|
|
||||||
"torch>=2.4",
|
|
||||||
]
|
|
||||||
dev = [
|
|
||||||
"pytest>=8.3",
|
|
||||||
"pytest-asyncio>=0.24",
|
|
||||||
"pytest-httpx>=0.32",
|
|
||||||
"ruff>=0.8",
|
|
||||||
"mypy>=1.13",
|
|
||||||
]
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
requires = ["hatchling"]
|
|
||||||
build-backend = "hatchling.build"
|
|
||||||
|
|
||||||
[tool.hatch.build.targets.wheel]
|
|
||||||
packages = ["src/ix"]
|
|
||||||
|
|
||||||
[tool.pytest.ini_options]
|
|
||||||
asyncio_mode = "auto"
|
|
||||||
testpaths = ["tests"]
|
|
||||||
addopts = "-ra --strict-markers"
|
|
||||||
markers = [
|
|
||||||
"live: requires live Ollama/Surya (IX_TEST_OLLAMA=1 to enable)",
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.ruff]
|
|
||||||
line-length = 100
|
|
||||||
target-version = "py312"
|
|
||||||
|
|
||||||
[tool.ruff.lint]
|
|
||||||
select = ["E", "F", "W", "I", "UP", "B", "SIM", "RUF"]
|
|
||||||
ignore = ["E501"] # line length handled by formatter
|
|
||||||
|
|
||||||
[tool.mypy]
|
|
||||||
python_version = "3.12"
|
|
||||||
strict = true
|
|
||||||
plugins = ["pydantic.mypy"]
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
||||||
"""InfoXtractor — async on-prem structured information extraction service."""
|
|
||||||
|
|
||||||
__version__ = "0.1.0"
|
|
||||||
|
|
@ -1,15 +0,0 @@
|
||||||
"""Shared pytest fixtures.
|
|
||||||
|
|
||||||
Unit-test fixtures live here. Integration-test fixtures (real Postgres, etc.)
|
|
||||||
will be added in Chunk 3 once the store module lands.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def anyio_backend() -> str:
|
|
||||||
"""Force asyncio for anyio-based tests (pytest-asyncio uses this)."""
|
|
||||||
return "asyncio"
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
||||||
"""Smoke test: confirms the package is importable and pyproject is well-formed.
|
|
||||||
|
|
||||||
Acts as the minimum-viable test that the project is set up correctly. Gets
|
|
||||||
replaced with real tests as the chunks land.
|
|
||||||
"""
|
|
||||||
|
|
||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
|
|
||||||
def test_package_importable() -> None:
|
|
||||||
import ix
|
|
||||||
|
|
||||||
assert ix.__version__ == "0.1.0"
|
|
||||||
Loading…
Reference in a new issue