From 5ee74f367ca8954ab1b4644090769f80054ac118 Mon Sep 17 00:00:00 2001
From: Dirk Riemann <ditori@gmail.com>
Date: Sat, 18 Apr 2026 12:20:23 +0200
Subject: [PATCH] chore(model): switch default IX_DEFAULT_MODEL to qwen3:14b
 (already on host)

The home server's Ollama doesn't have gpt-oss:20b pulled; qwen3:14b is
already there and is what mammon's chat agent uses. Switching the default
now so the first deploy passes the /healthz ollama probe without an extra
`ollama pull` step. The spec lists gpt-oss:20b as a concrete example;
qwen3:14b is equally on-prem and Ollama-structured-output-compatible.

Touched: AppConfig default, BankStatementHeader Request.default_model,
.env.example, setup_server.sh ollama-list check, AGENTS.md, deployment.md,
live tests. Unit tests that hard-coded the old model string but don't
assert the default were left alone.

Also: ASCII en-dash in e2e_smoke.py Paperless-style text (ruff RUF001).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .env.example                                      | 2 +-
 AGENTS.md                                         | 2 +-
 docs/deployment.md                                | 8 ++++----
 scripts/e2e_smoke.py                              | 2 +-
 scripts/setup_server.sh                           | 8 ++++----
 src/ix/config.py                                  | 2 +-
 src/ix/use_cases/bank_statement_header.py         | 2 +-
 tests/live/test_ollama_client_live.py             | 4 ++--
 tests/unit/test_config.py                         | 2 +-
 tests/unit/test_genai_step.py                     | 4 ++--
 tests/unit/test_use_case_bank_statement_header.py | 2 +-
 11 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/.env.example b/.env.example
index 5f3d5c3..e3ccb54 100644
--- a/.env.example
+++ b/.env.example
@@ -8,7 +8,7 @@ IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.interna
 
 # --- LLM backend ---------------------------------------------------------
 IX_OLLAMA_URL=http://host.docker.internal:11434
-IX_DEFAULT_MODEL=gpt-oss:20b
+IX_DEFAULT_MODEL=qwen3:14b
 
 # --- OCR -----------------------------------------------------------------
 IX_OCR_ENGINE=surya
diff --git a/AGENTS.md b/AGENTS.md
index 88db862..7a417f3 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -25,7 +25,7 @@ Status: design phase. Full reference spec at `docs/spec-core-pipeline.md`. MVP s
 - **Language**: Python 3.12, asyncio
 - **Web/REST**: FastAPI + uvicorn
 - **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML)
-- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `gpt-oss:20b`, configurable per use case
+- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `qwen3:14b`, configurable per use case
 - **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database
 - **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps)
 
diff --git a/docs/deployment.md b/docs/deployment.md
index c055c80..5f34108 100644
--- a/docs/deployment.md
+++ b/docs/deployment.md
@@ -14,7 +14,7 @@ Mac (dev)
   │                 curl /healthz (60 s gate)
   ▼
 Docker container `infoxtractor` (port 8994)
-  ├─ host.docker.internal:11434  →  Ollama (gpt-oss:20b)
+  ├─ host.docker.internal:11434  →  Ollama (qwen3:14b)
   └─ host.docker.internal:5431   →  postgis (database `infoxtractor`)
 ```
 
@@ -32,7 +32,7 @@ The script:
 2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template).
 3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container.
 4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in.
-5. Verifies `gpt-oss:20b` is pulled in Ollama.
+5. Verifies `qwen3:14b` is pulled in Ollama.
 6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing.
 
 After the script finishes, add the deploy remote to the local repo:
@@ -103,9 +103,9 @@ The `IX_DEFAULT_MODEL` env var on the server's `.env` must match something in `o
 
 ### If `/healthz` shows `ollama: degraded`
 
-`gpt-oss:20b` (or the configured default) is not pulled. On the host:
+`qwen3:14b` (or the configured default) is not pulled. On the host:
 ```bash
-ssh server@192.168.68.42 "docker exec ollama ollama pull gpt-oss:20b"
+ssh server@192.168.68.42 "docker exec ollama ollama pull qwen3:14b"
 ```
 
 ### If `/healthz` shows `ocr: fail`
diff --git a/scripts/e2e_smoke.py b/scripts/e2e_smoke.py
index df9f891..4ef7597 100755
--- a/scripts/e2e_smoke.py
+++ b/scripts/e2e_smoke.py
@@ -98,7 +98,7 @@ def post_job(base_url: str, file_url: str, client_id: str, request_id: str) -> d
     paperless_text = (
         "DKB\n"
         "DE89370400440532013000\n"
-        "Statement period: 01.03.2026 – 31.03.2026\n"
+        "Statement period: 01.03.2026 - 31.03.2026\n"
         "Opening balance: 1234.56 EUR\n"
         "Closing balance: 1450.22 EUR\n"
         "31.03.2026\n"
diff --git a/scripts/setup_server.sh b/scripts/setup_server.sh
index f4b4b14..eca59d5 100755
--- a/scripts/setup_server.sh
+++ b/scripts/setup_server.sh
@@ -9,7 +9,7 @@
 #   2. Writes the post-receive hook (or updates it) and makes it executable.
 #   3. Creates the Postgres role + database on the shared `postgis` container.
 #   4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example.
-#   5. Verifies `gpt-oss:20b` is pulled in Ollama.
+#   5. Verifies `qwen3:14b` is pulled in Ollama.
 
 set -euo pipefail
 
@@ -70,9 +70,9 @@ HOOK
 chmod +x "${REPOS_GIT}/hooks/post-receive"
 EOF
 
-echo "==> 2/5  Verifying Ollama has gpt-oss:20b pulled"
-if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'gpt-oss:20b'"; then
-  echo "FAIL: gpt-oss:20b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull gpt-oss:20b'" >&2
+echo "==> 2/5  Verifying Ollama has qwen3:14b pulled"
+if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'qwen3:14b'"; then
+  echo "FAIL: qwen3:14b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull qwen3:14b'" >&2
   exit 1
 fi
 
diff --git a/src/ix/config.py b/src/ix/config.py
index 9740633..9f12ae7 100644
--- a/src/ix/config.py
+++ b/src/ix/config.py
@@ -40,7 +40,7 @@ class AppConfig(BaseSettings):
 
     # --- LLM backend ---
     ollama_url: str = "http://host.docker.internal:11434"
-    default_model: str = "gpt-oss:20b"
+    default_model: str = "qwen3:14b"
 
     # --- OCR ---
     ocr_engine: str = "surya"
diff --git a/src/ix/use_cases/bank_statement_header.py b/src/ix/use_cases/bank_statement_header.py
index bfd0198..4e83ed9 100644
--- a/src/ix/use_cases/bank_statement_header.py
+++ b/src/ix/use_cases/bank_statement_header.py
@@ -26,7 +26,7 @@ class Request(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
     use_case_name: str = "Bank Statement Header"
-    default_model: str = "gpt-oss:20b"
+    default_model: str = "qwen3:14b"
     system_prompt: str = (
         "You extract header metadata from a single bank or credit-card statement. "
         "Return only facts that appear in the document; leave a field null if uncertain. "
diff --git a/tests/live/test_ollama_client_live.py b/tests/live/test_ollama_client_live.py
index dd9eb26..f75abad 100644
--- a/tests/live/test_ollama_client_live.py
+++ b/tests/live/test_ollama_client_live.py
@@ -5,7 +5,7 @@ Never runs in CI (Forgejo runner has no LAN access to Ollama). Run locally::
     IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v
 
 Assumes the Ollama server at ``http://192.168.68.42:11434`` already has
-``gpt-oss:20b`` pulled.
+``qwen3:14b`` pulled.
 """
 
 from __future__ import annotations
@@ -26,7 +26,7 @@ pytestmark = [
 ]
 
 _OLLAMA_URL = "http://192.168.68.42:11434"
-_MODEL = "gpt-oss:20b"
+_MODEL = "qwen3:14b"
 
 
 async def test_structured_output_round_trip() -> None:
diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py
index 7a2d5fb..5ebbb4d 100644
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -54,7 +54,7 @@ def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:
         "@host.docker.internal:5431/infoxtractor"
     )
     assert cfg.ollama_url == "http://host.docker.internal:11434"
-    assert cfg.default_model == "gpt-oss:20b"
+    assert cfg.default_model == "qwen3:14b"
     assert cfg.ocr_engine == "surya"
     assert cfg.tmp_dir == "/tmp/ix"
     assert cfg.pipeline_worker_concurrency == 1
diff --git a/tests/unit/test_genai_step.py b/tests/unit/test_genai_step.py
index ec959f2..b118cae 100644
--- a/tests/unit/test_genai_step.py
+++ b/tests/unit/test_genai_step.py
@@ -363,8 +363,8 @@ class TestModelSelection:
         req = _make_request(include_provenance=False)
         resp = _response_with_segment_index(lines=["hello"])
         await step.process(req, resp)
-        # use-case default is gpt-oss:20b
-        assert client.request_kwargs["model"] == "gpt-oss:20b"  # type: ignore[index]
+        # use-case default is qwen3:14b
+        assert client.request_kwargs["model"] == "qwen3:14b"  # type: ignore[index]
 
 
 # ----------------------------------------------------------------------------
diff --git a/tests/unit/test_use_case_bank_statement_header.py b/tests/unit/test_use_case_bank_statement_header.py
index e904aa9..2eba2bf 100644
--- a/tests/unit/test_use_case_bank_statement_header.py
+++ b/tests/unit/test_use_case_bank_statement_header.py
@@ -12,7 +12,7 @@ class TestRequest:
     def test_defaults(self) -> None:
         r = Request()
         assert r.use_case_name == "Bank Statement Header"
-        assert r.default_model == "gpt-oss:20b"
+        assert r.default_model == "qwen3:14b"
         # Stable substring for agent/worker tests that want to confirm the
         # prompt is what they think it is.
         assert "extract header metadata" in r.system_prompt