Typed pydantic-settings view over every IX_* env var, defaults matching spec §9 exactly. @lru_cache-wrapped accessor so parsing/validation happens once per process; tests clear the cache via get_config.cache_clear(). extra="ignore" keeps the container robust against typo'd env vars in production .env files. engine.py's URL resolver now goes through get_config() when ix.config is importable (bootstrap fallback remains so hypothetical early-import callers don't crash). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
75 lines
2.3 KiB
Python
75 lines
2.3 KiB
Python
"""Application configuration — loaded from ``IX_*`` env vars via pydantic-settings.
|
|
|
|
Spec §9 lists every tunable. This module is the single read-point for them;
|
|
callers that need runtime config should go through :func:`get_config` rather
|
|
than ``os.environ``. The LRU cache makes the first call materialise + validate
|
|
the full config and every subsequent call return the same instance.
|
|
|
|
Cache-clearing is public (``get_config.cache_clear()``) because tests need to
|
|
re-read after ``monkeypatch.setenv``. Production code never clears the cache.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from functools import lru_cache
|
|
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
|
|
|
|
class AppConfig(BaseSettings):
|
|
"""Typed view over the ``IX_*`` environment.
|
|
|
|
Field names drop the ``IX_`` prefix — pydantic-settings puts it back via
|
|
``env_prefix``. Defaults match the spec exactly; do not change a default
|
|
here without updating spec §9 in the same commit.
|
|
"""
|
|
|
|
model_config = SettingsConfigDict(
|
|
env_prefix="IX_",
|
|
env_file=".env",
|
|
env_file_encoding="utf-8",
|
|
extra="ignore",
|
|
)
|
|
|
|
# --- Job store ---
|
|
postgres_url: str = (
|
|
"postgresql+asyncpg://infoxtractor:<password>"
|
|
"@host.docker.internal:5431/infoxtractor"
|
|
)
|
|
|
|
# --- LLM backend ---
|
|
ollama_url: str = "http://host.docker.internal:11434"
|
|
default_model: str = "gpt-oss:20b"
|
|
|
|
# --- OCR ---
|
|
ocr_engine: str = "surya"
|
|
|
|
# --- Pipeline behavior ---
|
|
pipeline_worker_concurrency: int = 1
|
|
pipeline_request_timeout_seconds: int = 2700
|
|
genai_call_timeout_seconds: int = 1500
|
|
render_max_pixels_per_page: int = 75_000_000
|
|
|
|
# --- File fetching ---
|
|
tmp_dir: str = "/tmp/ix"
|
|
file_max_bytes: int = 52_428_800
|
|
file_connect_timeout_seconds: int = 10
|
|
file_read_timeout_seconds: int = 30
|
|
|
|
# --- Transport / callbacks ---
|
|
callback_timeout_seconds: int = 10
|
|
|
|
# --- Observability ---
|
|
log_level: str = "INFO"
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def get_config() -> AppConfig:
|
|
"""Return the process-wide :class:`AppConfig` (materialise on first call).
|
|
|
|
Wrapped in ``lru_cache`` so config is parsed + validated once per process.
|
|
Tests call ``get_config.cache_clear()`` between scenarios; nothing in
|
|
production should touch the cache.
|
|
"""
|
|
|
|
return AppConfig()
|