feat(config): AppConfig + cached get_config() (spec §9)
Typed pydantic-settings view over every IX_* env var, defaults matching spec §9 exactly. @lru_cache-wrapped accessor so parsing/validation happens once per process; tests clear the cache via get_config.cache_clear(). extra="ignore" keeps the container robust against typo'd env vars in production .env files. engine.py's URL resolver now goes through get_config() when ix.config is importable (bootstrap fallback remains so hypothetical early-import callers don't crash). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
dc6d28bda1
commit
95728accbf
2 changed files with 206 additions and 0 deletions
75
src/ix/config.py
Normal file
75
src/ix/config.py
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
"""Application configuration — loaded from ``IX_*`` env vars via pydantic-settings.
|
||||
|
||||
Spec §9 lists every tunable. This module is the single read-point for them;
|
||||
callers that need runtime config should go through :func:`get_config` rather
|
||||
than ``os.environ``. The LRU cache makes the first call materialise + validate
|
||||
the full config and every subsequent call return the same instance.
|
||||
|
||||
Cache-clearing is public (``get_config.cache_clear()``) because tests need to
|
||||
re-read after ``monkeypatch.setenv``. Production code never clears the cache.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class AppConfig(BaseSettings):
|
||||
"""Typed view over the ``IX_*`` environment.
|
||||
|
||||
Field names drop the ``IX_`` prefix — pydantic-settings puts it back via
|
||||
``env_prefix``. Defaults match the spec exactly; do not change a default
|
||||
here without updating spec §9 in the same commit.
|
||||
"""
|
||||
|
||||
model_config = SettingsConfigDict(
|
||||
env_prefix="IX_",
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
# --- Job store ---
|
||||
postgres_url: str = (
|
||||
"postgresql+asyncpg://infoxtractor:<password>"
|
||||
"@host.docker.internal:5431/infoxtractor"
|
||||
)
|
||||
|
||||
# --- LLM backend ---
|
||||
ollama_url: str = "http://host.docker.internal:11434"
|
||||
default_model: str = "gpt-oss:20b"
|
||||
|
||||
# --- OCR ---
|
||||
ocr_engine: str = "surya"
|
||||
|
||||
# --- Pipeline behavior ---
|
||||
pipeline_worker_concurrency: int = 1
|
||||
pipeline_request_timeout_seconds: int = 2700
|
||||
genai_call_timeout_seconds: int = 1500
|
||||
render_max_pixels_per_page: int = 75_000_000
|
||||
|
||||
# --- File fetching ---
|
||||
tmp_dir: str = "/tmp/ix"
|
||||
file_max_bytes: int = 52_428_800
|
||||
file_connect_timeout_seconds: int = 10
|
||||
file_read_timeout_seconds: int = 30
|
||||
|
||||
# --- Transport / callbacks ---
|
||||
callback_timeout_seconds: int = 10
|
||||
|
||||
# --- Observability ---
|
||||
log_level: str = "INFO"
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_config() -> AppConfig:
|
||||
"""Return the process-wide :class:`AppConfig` (materialise on first call).
|
||||
|
||||
Wrapped in ``lru_cache`` so config is parsed + validated once per process.
|
||||
Tests call ``get_config.cache_clear()`` between scenarios; nothing in
|
||||
production should touch the cache.
|
||||
"""
|
||||
|
||||
return AppConfig()
|
||||
131
tests/unit/test_config.py
Normal file
131
tests/unit/test_config.py
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
"""Tests for :mod:`ix.config` — the pydantic-settings ``AppConfig``.
|
||||
|
||||
Guardrails we care about:
|
||||
|
||||
1. Every env var in spec §9 round-trips with the right type.
|
||||
2. Defaults match the spec exactly when no env is set.
|
||||
3. Unknown IX_ vars are ignored (``extra="ignore"``) so a typo doesn't crash
|
||||
the container at startup.
|
||||
4. ``get_config()`` is cached — same instance per process — and
|
||||
``get_config.cache_clear()`` rebuilds from the current environment (used by
|
||||
every test here to keep them independent of process state).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from ix.config import AppConfig, get_config
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_config_cache() -> None:
|
||||
"""Flush the LRU cache around every test.
|
||||
|
||||
Without this, tests that set env vars would see stale data from earlier
|
||||
runs because ``get_config()`` caches the first materialised instance.
|
||||
"""
|
||||
|
||||
get_config.cache_clear()
|
||||
|
||||
|
||||
def _clear_ix_env(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Scrub every IX_* var so defaults surface predictably.
|
||||
|
||||
Tests that exercise env-based overrides still call ``monkeypatch.setenv``
|
||||
after this to dial in specific values; tests for defaults rely on this
|
||||
scrubbing so a developer's local ``.env`` can't contaminate the assertion.
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
for key in list(os.environ):
|
||||
if key.startswith("IX_"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
_clear_ix_env(monkeypatch)
|
||||
# Don't let pydantic-settings pick up the repo's .env.example.
|
||||
cfg = AppConfig(_env_file=None) # type: ignore[call-arg]
|
||||
|
||||
assert cfg.postgres_url == (
|
||||
"postgresql+asyncpg://infoxtractor:<password>"
|
||||
"@host.docker.internal:5431/infoxtractor"
|
||||
)
|
||||
assert cfg.ollama_url == "http://host.docker.internal:11434"
|
||||
assert cfg.default_model == "gpt-oss:20b"
|
||||
assert cfg.ocr_engine == "surya"
|
||||
assert cfg.tmp_dir == "/tmp/ix"
|
||||
assert cfg.pipeline_worker_concurrency == 1
|
||||
assert cfg.pipeline_request_timeout_seconds == 2700
|
||||
assert cfg.genai_call_timeout_seconds == 1500
|
||||
assert cfg.file_max_bytes == 52428800
|
||||
assert cfg.file_connect_timeout_seconds == 10
|
||||
assert cfg.file_read_timeout_seconds == 30
|
||||
assert cfg.render_max_pixels_per_page == 75000000
|
||||
assert cfg.log_level == "INFO"
|
||||
assert cfg.callback_timeout_seconds == 10
|
||||
|
||||
|
||||
def test_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
_clear_ix_env(monkeypatch)
|
||||
monkeypatch.setenv("IX_POSTGRES_URL", "postgresql+asyncpg://u:p@db:5432/x")
|
||||
monkeypatch.setenv("IX_OLLAMA_URL", "http://llm:11434")
|
||||
monkeypatch.setenv("IX_DEFAULT_MODEL", "llama3:8b")
|
||||
monkeypatch.setenv("IX_PIPELINE_WORKER_CONCURRENCY", "4")
|
||||
monkeypatch.setenv("IX_GENAI_CALL_TIMEOUT_SECONDS", "60")
|
||||
monkeypatch.setenv("IX_LOG_LEVEL", "DEBUG")
|
||||
monkeypatch.setenv("IX_CALLBACK_TIMEOUT_SECONDS", "30")
|
||||
|
||||
cfg = AppConfig(_env_file=None) # type: ignore[call-arg]
|
||||
|
||||
assert cfg.postgres_url == "postgresql+asyncpg://u:p@db:5432/x"
|
||||
assert cfg.ollama_url == "http://llm:11434"
|
||||
assert cfg.default_model == "llama3:8b"
|
||||
assert cfg.pipeline_worker_concurrency == 4
|
||||
assert cfg.genai_call_timeout_seconds == 60
|
||||
assert cfg.log_level == "DEBUG"
|
||||
assert cfg.callback_timeout_seconds == 30
|
||||
|
||||
|
||||
def test_get_config_is_cached(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
_clear_ix_env(monkeypatch)
|
||||
monkeypatch.setenv("IX_POSTGRES_URL", "postgresql+asyncpg://a:b@c:5432/d1")
|
||||
first = get_config()
|
||||
# Later mutation must NOT be seen until cache_clear — this is a feature,
|
||||
# not a bug: config is process-level state, not per-call.
|
||||
monkeypatch.setenv("IX_POSTGRES_URL", "postgresql+asyncpg://a:b@c:5432/d2")
|
||||
second = get_config()
|
||||
assert first is second
|
||||
assert second.postgres_url.endswith("/d1")
|
||||
|
||||
get_config.cache_clear()
|
||||
third = get_config()
|
||||
assert third is not first
|
||||
assert third.postgres_url.endswith("/d2")
|
||||
|
||||
|
||||
def test_extra_env_keys_are_ignored(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""A typo'd IX_FOOBAR should not raise ValidationError at startup."""
|
||||
_clear_ix_env(monkeypatch)
|
||||
monkeypatch.setenv("IX_FOOBAR", "whatever")
|
||||
# Should not raise.
|
||||
cfg = AppConfig(_env_file=None) # type: ignore[call-arg]
|
||||
assert cfg.ollama_url.startswith("http://")
|
||||
|
||||
|
||||
def test_engine_uses_config_url(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""``ix.store.engine`` reads the URL through ``AppConfig``.
|
||||
|
||||
Task 3.2 refactors engine.py to go through ``get_config()`` instead of
|
||||
reading ``os.environ`` directly. We can't actually construct an async
|
||||
engine in a unit test (would need the DB), so we verify the resolution
|
||||
function exists and returns the configured URL.
|
||||
"""
|
||||
_clear_ix_env(monkeypatch)
|
||||
monkeypatch.setenv("IX_POSTGRES_URL", "postgresql+asyncpg://a:b@c:5432/d")
|
||||
|
||||
from ix.store.engine import _resolve_url
|
||||
|
||||
assert _resolve_url() == "postgresql+asyncpg://a:b@c:5432/d"
|
||||
Loading…
Reference in a new issue