"""Application configuration — loaded from ``IX_*`` env vars via pydantic-settings. Spec §9 lists every tunable. This module is the single read-point for them; callers that need runtime config should go through :func:`get_config` rather than ``os.environ``. The LRU cache makes the first call materialise + validate the full config and every subsequent call return the same instance. Cache-clearing is public (``get_config.cache_clear()``) because tests need to re-read after ``monkeypatch.setenv``. Production code never clears the cache. """ from __future__ import annotations from functools import lru_cache from typing import Literal from pydantic_settings import BaseSettings, SettingsConfigDict class AppConfig(BaseSettings): """Typed view over the ``IX_*`` environment. Field names drop the ``IX_`` prefix — pydantic-settings puts it back via ``env_prefix``. Defaults match the spec exactly; do not change a default here without updating spec §9 in the same commit. """ model_config = SettingsConfigDict( env_prefix="IX_", env_file=".env", env_file_encoding="utf-8", extra="ignore", ) # --- Job store --- # Defaults assume the ix container runs with `network_mode: host` and # reaches the shared `postgis` and `ollama` containers on loopback; # spec §11 / docker-compose.yml ship that configuration. postgres_url: str = ( "postgresql+asyncpg://infoxtractor:" "@127.0.0.1:5431/infoxtractor" ) # --- LLM backend --- ollama_url: str = "http://127.0.0.1:11434" default_model: str = "qwen3:14b" # --- OCR --- ocr_engine: str = "surya" # --- Pipeline behavior --- pipeline_worker_concurrency: int = 1 pipeline_request_timeout_seconds: int = 2700 genai_call_timeout_seconds: int = 1500 render_max_pixels_per_page: int = 75_000_000 # --- File fetching --- tmp_dir: str = "/tmp/ix" file_max_bytes: int = 52_428_800 file_connect_timeout_seconds: int = 10 file_read_timeout_seconds: int = 30 # --- Transport / callbacks --- callback_timeout_seconds: int = 10 # --- Observability --- log_level: str = "INFO" # --- Test / wiring mode --- # ``fake``: factories return FakeGenAIClient / FakeOCRClient and # ``/healthz`` probes report ok. CI sets this so the Forgejo runner # doesn't need access to Ollama or GPU-backed Surya. ``None`` (default) # means production wiring: real OllamaClient + SuryaOCRClient. test_mode: Literal["fake"] | None = None @lru_cache(maxsize=1) def get_config() -> AppConfig: """Return the process-wide :class:`AppConfig` (materialise on first call). Wrapped in ``lru_cache`` so config is parsed + validated once per process. Tests call ``get_config.cache_clear()`` between scenarios; nothing in production should touch the cache. """ return AppConfig()