infoxtractor/tests/unit/test_config.py
Dirk Riemann 95728accbf
All checks were successful
tests / test (push) Successful in 1m1s
tests / test (pull_request) Successful in 58s
feat(config): AppConfig + cached get_config() (spec §9)
Typed pydantic-settings view over every IX_* env var, defaults matching
spec §9 exactly. @lru_cache-wrapped accessor so parsing/validation happens
once per process; tests clear the cache via get_config.cache_clear().

extra="ignore" keeps the container robust against typo'd env vars in
production .env files. engine.py's URL resolver now goes through
get_config() when ix.config is importable (bootstrap fallback remains so
hypothetical early-import callers don't crash).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 11:38:44 +02:00

131 lines
4.9 KiB
Python

"""Tests for :mod:`ix.config` — the pydantic-settings ``AppConfig``.
Guardrails we care about:
1. Every env var in spec §9 round-trips with the right type.
2. Defaults match the spec exactly when no env is set.
3. Unknown IX_ vars are ignored (``extra="ignore"``) so a typo doesn't crash
the container at startup.
4. ``get_config()`` is cached — same instance per process — and
``get_config.cache_clear()`` rebuilds from the current environment (used by
every test here to keep them independent of process state).
"""
from __future__ import annotations
import pytest
from ix.config import AppConfig, get_config
@pytest.fixture(autouse=True)
def _reset_config_cache() -> None:
"""Flush the LRU cache around every test.
Without this, tests that set env vars would see stale data from earlier
runs because ``get_config()`` caches the first materialised instance.
"""
get_config.cache_clear()
def _clear_ix_env(monkeypatch: pytest.MonkeyPatch) -> None:
"""Scrub every IX_* var so defaults surface predictably.
Tests that exercise env-based overrides still call ``monkeypatch.setenv``
after this to dial in specific values; tests for defaults rely on this
scrubbing so a developer's local ``.env`` can't contaminate the assertion.
"""
import os
for key in list(os.environ):
if key.startswith("IX_"):
monkeypatch.delenv(key, raising=False)
def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:
_clear_ix_env(monkeypatch)
# Don't let pydantic-settings pick up the repo's .env.example.
cfg = AppConfig(_env_file=None) # type: ignore[call-arg]
assert cfg.postgres_url == (
"postgresql+asyncpg://infoxtractor:<password>"
"@host.docker.internal:5431/infoxtractor"
)
assert cfg.ollama_url == "http://host.docker.internal:11434"
assert cfg.default_model == "gpt-oss:20b"
assert cfg.ocr_engine == "surya"
assert cfg.tmp_dir == "/tmp/ix"
assert cfg.pipeline_worker_concurrency == 1
assert cfg.pipeline_request_timeout_seconds == 2700
assert cfg.genai_call_timeout_seconds == 1500
assert cfg.file_max_bytes == 52428800
assert cfg.file_connect_timeout_seconds == 10
assert cfg.file_read_timeout_seconds == 30
assert cfg.render_max_pixels_per_page == 75000000
assert cfg.log_level == "INFO"
assert cfg.callback_timeout_seconds == 10
def test_env_overrides(monkeypatch: pytest.MonkeyPatch) -> None:
_clear_ix_env(monkeypatch)
monkeypatch.setenv("IX_POSTGRES_URL", "postgresql+asyncpg://u:p@db:5432/x")
monkeypatch.setenv("IX_OLLAMA_URL", "http://llm:11434")
monkeypatch.setenv("IX_DEFAULT_MODEL", "llama3:8b")
monkeypatch.setenv("IX_PIPELINE_WORKER_CONCURRENCY", "4")
monkeypatch.setenv("IX_GENAI_CALL_TIMEOUT_SECONDS", "60")
monkeypatch.setenv("IX_LOG_LEVEL", "DEBUG")
monkeypatch.setenv("IX_CALLBACK_TIMEOUT_SECONDS", "30")
cfg = AppConfig(_env_file=None) # type: ignore[call-arg]
assert cfg.postgres_url == "postgresql+asyncpg://u:p@db:5432/x"
assert cfg.ollama_url == "http://llm:11434"
assert cfg.default_model == "llama3:8b"
assert cfg.pipeline_worker_concurrency == 4
assert cfg.genai_call_timeout_seconds == 60
assert cfg.log_level == "DEBUG"
assert cfg.callback_timeout_seconds == 30
def test_get_config_is_cached(monkeypatch: pytest.MonkeyPatch) -> None:
_clear_ix_env(monkeypatch)
monkeypatch.setenv("IX_POSTGRES_URL", "postgresql+asyncpg://a:b@c:5432/d1")
first = get_config()
# Later mutation must NOT be seen until cache_clear — this is a feature,
# not a bug: config is process-level state, not per-call.
monkeypatch.setenv("IX_POSTGRES_URL", "postgresql+asyncpg://a:b@c:5432/d2")
second = get_config()
assert first is second
assert second.postgres_url.endswith("/d1")
get_config.cache_clear()
third = get_config()
assert third is not first
assert third.postgres_url.endswith("/d2")
def test_extra_env_keys_are_ignored(monkeypatch: pytest.MonkeyPatch) -> None:
"""A typo'd IX_FOOBAR should not raise ValidationError at startup."""
_clear_ix_env(monkeypatch)
monkeypatch.setenv("IX_FOOBAR", "whatever")
# Should not raise.
cfg = AppConfig(_env_file=None) # type: ignore[call-arg]
assert cfg.ollama_url.startswith("http://")
def test_engine_uses_config_url(monkeypatch: pytest.MonkeyPatch) -> None:
"""``ix.store.engine`` reads the URL through ``AppConfig``.
Task 3.2 refactors engine.py to go through ``get_config()`` instead of
reading ``os.environ`` directly. We can't actually construct an async
engine in a unit test (would need the DB), so we verify the resolution
function exists and returns the configured URL.
"""
_clear_ix_env(monkeypatch)
monkeypatch.setenv("IX_POSTGRES_URL", "postgresql+asyncpg://a:b@c:5432/d")
from ix.store.engine import _resolve_url
assert _resolve_url() == "postgresql+asyncpg://a:b@c:5432/d"