From 5ee74f367ca8954ab1b4644090769f80054ac118 Mon Sep 17 00:00:00 2001 From: Dirk Riemann Date: Sat, 18 Apr 2026 12:20:23 +0200 Subject: [PATCH] chore(model): switch default IX_DEFAULT_MODEL to qwen3:14b (already on host) The home server's Ollama doesn't have gpt-oss:20b pulled; qwen3:14b is already there and is what mammon's chat agent uses. Switching the default now so the first deploy passes the /healthz ollama probe without an extra `ollama pull` step. The spec lists gpt-oss:20b as a concrete example; qwen3:14b is equally on-prem and Ollama-structured-output-compatible. Touched: AppConfig default, BankStatementHeader Request.default_model, .env.example, setup_server.sh ollama-list check, AGENTS.md, deployment.md, live tests. Unit tests that hard-coded the old model string but don't assert the default were left alone. Also: ASCII en-dash in e2e_smoke.py Paperless-style text (ruff RUF001). Co-Authored-By: Claude Opus 4.7 (1M context) --- .env.example | 2 +- AGENTS.md | 2 +- docs/deployment.md | 8 ++++---- scripts/e2e_smoke.py | 2 +- scripts/setup_server.sh | 8 ++++---- src/ix/config.py | 2 +- src/ix/use_cases/bank_statement_header.py | 2 +- tests/live/test_ollama_client_live.py | 4 ++-- tests/unit/test_config.py | 2 +- tests/unit/test_genai_step.py | 4 ++-- tests/unit/test_use_case_bank_statement_header.py | 2 +- 11 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.env.example b/.env.example index 5f3d5c3..e3ccb54 100644 --- a/.env.example +++ b/.env.example @@ -8,7 +8,7 @@ IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:@host.docker.interna # --- LLM backend --------------------------------------------------------- IX_OLLAMA_URL=http://host.docker.internal:11434 -IX_DEFAULT_MODEL=gpt-oss:20b +IX_DEFAULT_MODEL=qwen3:14b # --- OCR ----------------------------------------------------------------- IX_OCR_ENGINE=surya diff --git a/AGENTS.md b/AGENTS.md index 88db862..7a417f3 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -25,7 +25,7 @@ Status: design phase. Full reference spec at `docs/spec-core-pipeline.md`. MVP s - **Language**: Python 3.12, asyncio - **Web/REST**: FastAPI + uvicorn - **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML) -- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `gpt-oss:20b`, configurable per use case +- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `qwen3:14b`, configurable per use case - **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database - **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps) diff --git a/docs/deployment.md b/docs/deployment.md index c055c80..5f34108 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -14,7 +14,7 @@ Mac (dev) │ curl /healthz (60 s gate) ▼ Docker container `infoxtractor` (port 8994) - ├─ host.docker.internal:11434 → Ollama (gpt-oss:20b) + ├─ host.docker.internal:11434 → Ollama (qwen3:14b) └─ host.docker.internal:5431 → postgis (database `infoxtractor`) ``` @@ -32,7 +32,7 @@ The script: 2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template). 3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container. 4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in. -5. Verifies `gpt-oss:20b` is pulled in Ollama. +5. Verifies `qwen3:14b` is pulled in Ollama. 6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing. After the script finishes, add the deploy remote to the local repo: @@ -103,9 +103,9 @@ The `IX_DEFAULT_MODEL` env var on the server's `.env` must match something in `o ### If `/healthz` shows `ollama: degraded` -`gpt-oss:20b` (or the configured default) is not pulled. On the host: +`qwen3:14b` (or the configured default) is not pulled. On the host: ```bash -ssh server@192.168.68.42 "docker exec ollama ollama pull gpt-oss:20b" +ssh server@192.168.68.42 "docker exec ollama ollama pull qwen3:14b" ``` ### If `/healthz` shows `ocr: fail` diff --git a/scripts/e2e_smoke.py b/scripts/e2e_smoke.py index df9f891..4ef7597 100755 --- a/scripts/e2e_smoke.py +++ b/scripts/e2e_smoke.py @@ -98,7 +98,7 @@ def post_job(base_url: str, file_url: str, client_id: str, request_id: str) -> d paperless_text = ( "DKB\n" "DE89370400440532013000\n" - "Statement period: 01.03.2026 – 31.03.2026\n" + "Statement period: 01.03.2026 - 31.03.2026\n" "Opening balance: 1234.56 EUR\n" "Closing balance: 1450.22 EUR\n" "31.03.2026\n" diff --git a/scripts/setup_server.sh b/scripts/setup_server.sh index f4b4b14..eca59d5 100755 --- a/scripts/setup_server.sh +++ b/scripts/setup_server.sh @@ -9,7 +9,7 @@ # 2. Writes the post-receive hook (or updates it) and makes it executable. # 3. Creates the Postgres role + database on the shared `postgis` container. # 4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example. -# 5. Verifies `gpt-oss:20b` is pulled in Ollama. +# 5. Verifies `qwen3:14b` is pulled in Ollama. set -euo pipefail @@ -70,9 +70,9 @@ HOOK chmod +x "${REPOS_GIT}/hooks/post-receive" EOF -echo "==> 2/5 Verifying Ollama has gpt-oss:20b pulled" -if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'gpt-oss:20b'"; then - echo "FAIL: gpt-oss:20b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull gpt-oss:20b'" >&2 +echo "==> 2/5 Verifying Ollama has qwen3:14b pulled" +if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'qwen3:14b'"; then + echo "FAIL: qwen3:14b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull qwen3:14b'" >&2 exit 1 fi diff --git a/src/ix/config.py b/src/ix/config.py index 9740633..9f12ae7 100644 --- a/src/ix/config.py +++ b/src/ix/config.py @@ -40,7 +40,7 @@ class AppConfig(BaseSettings): # --- LLM backend --- ollama_url: str = "http://host.docker.internal:11434" - default_model: str = "gpt-oss:20b" + default_model: str = "qwen3:14b" # --- OCR --- ocr_engine: str = "surya" diff --git a/src/ix/use_cases/bank_statement_header.py b/src/ix/use_cases/bank_statement_header.py index bfd0198..4e83ed9 100644 --- a/src/ix/use_cases/bank_statement_header.py +++ b/src/ix/use_cases/bank_statement_header.py @@ -26,7 +26,7 @@ class Request(BaseModel): model_config = ConfigDict(extra="forbid") use_case_name: str = "Bank Statement Header" - default_model: str = "gpt-oss:20b" + default_model: str = "qwen3:14b" system_prompt: str = ( "You extract header metadata from a single bank or credit-card statement. " "Return only facts that appear in the document; leave a field null if uncertain. " diff --git a/tests/live/test_ollama_client_live.py b/tests/live/test_ollama_client_live.py index dd9eb26..f75abad 100644 --- a/tests/live/test_ollama_client_live.py +++ b/tests/live/test_ollama_client_live.py @@ -5,7 +5,7 @@ Never runs in CI (Forgejo runner has no LAN access to Ollama). Run locally:: IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v Assumes the Ollama server at ``http://192.168.68.42:11434`` already has -``gpt-oss:20b`` pulled. +``qwen3:14b`` pulled. """ from __future__ import annotations @@ -26,7 +26,7 @@ pytestmark = [ ] _OLLAMA_URL = "http://192.168.68.42:11434" -_MODEL = "gpt-oss:20b" +_MODEL = "qwen3:14b" async def test_structured_output_round_trip() -> None: diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 7a2d5fb..5ebbb4d 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -54,7 +54,7 @@ def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None: "@host.docker.internal:5431/infoxtractor" ) assert cfg.ollama_url == "http://host.docker.internal:11434" - assert cfg.default_model == "gpt-oss:20b" + assert cfg.default_model == "qwen3:14b" assert cfg.ocr_engine == "surya" assert cfg.tmp_dir == "/tmp/ix" assert cfg.pipeline_worker_concurrency == 1 diff --git a/tests/unit/test_genai_step.py b/tests/unit/test_genai_step.py index ec959f2..b118cae 100644 --- a/tests/unit/test_genai_step.py +++ b/tests/unit/test_genai_step.py @@ -363,8 +363,8 @@ class TestModelSelection: req = _make_request(include_provenance=False) resp = _response_with_segment_index(lines=["hello"]) await step.process(req, resp) - # use-case default is gpt-oss:20b - assert client.request_kwargs["model"] == "gpt-oss:20b" # type: ignore[index] + # use-case default is qwen3:14b + assert client.request_kwargs["model"] == "qwen3:14b" # type: ignore[index] # ---------------------------------------------------------------------------- diff --git a/tests/unit/test_use_case_bank_statement_header.py b/tests/unit/test_use_case_bank_statement_header.py index e904aa9..2eba2bf 100644 --- a/tests/unit/test_use_case_bank_statement_header.py +++ b/tests/unit/test_use_case_bank_statement_header.py @@ -12,7 +12,7 @@ class TestRequest: def test_defaults(self) -> None: r = Request() assert r.use_case_name == "Bank Statement Header" - assert r.default_model == "gpt-oss:20b" + assert r.default_model == "qwen3:14b" # Stable substring for agent/worker tests that want to confirm the # prompt is what they think it is. assert "extract header metadata" in r.system_prompt