"""Application configuration — loaded from ``IX_*`` env vars via pydantic-settings. Spec §9 lists every tunable. This module is the single read-point for them; callers that need runtime config should go through :func:`get_config` rather than ``os.environ``. The LRU cache makes the first call materialise + validate the full config and every subsequent call return the same instance. Cache-clearing is public (``get_config.cache_clear()``) because tests need to re-read after ``monkeypatch.setenv``. Production code never clears the cache. """ from __future__ import annotations from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict class AppConfig(BaseSettings): """Typed view over the ``IX_*`` environment. Field names drop the ``IX_`` prefix — pydantic-settings puts it back via ``env_prefix``. Defaults match the spec exactly; do not change a default here without updating spec §9 in the same commit. """ model_config = SettingsConfigDict( env_prefix="IX_", env_file=".env", env_file_encoding="utf-8", extra="ignore", ) # --- Job store --- postgres_url: str = ( "postgresql+asyncpg://infoxtractor:" "@host.docker.internal:5431/infoxtractor" ) # --- LLM backend --- ollama_url: str = "http://host.docker.internal:11434" default_model: str = "gpt-oss:20b" # --- OCR --- ocr_engine: str = "surya" # --- Pipeline behavior --- pipeline_worker_concurrency: int = 1 pipeline_request_timeout_seconds: int = 2700 genai_call_timeout_seconds: int = 1500 render_max_pixels_per_page: int = 75_000_000 # --- File fetching --- tmp_dir: str = "/tmp/ix" file_max_bytes: int = 52_428_800 file_connect_timeout_seconds: int = 10 file_read_timeout_seconds: int = 30 # --- Transport / callbacks --- callback_timeout_seconds: int = 10 # --- Observability --- log_level: str = "INFO" @lru_cache(maxsize=1) def get_config() -> AppConfig: """Return the process-wide :class:`AppConfig` (materialise on first call). Wrapped in ``lru_cache`` so config is parsed + validated once per process. Tests call ``get_config.cache_clear()`` between scenarios; nothing in production should touch the cache. """ return AppConfig()