chore(model): qwen3:14b default (#30)

unblock first deploy
2026-04-18 10:20:38 +00:00 · 2026-04-18 10:20:38 +00:00 · a9e510362d
commit a9e510362d
parent f6cc99f062 5ee74f367c
11 changed files with 19 additions and 19 deletions
--- a/.env.example
+++ b/.env.example
@ -8,7 +8,7 @@ IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.interna

 # --- LLM backend ---------------------------------------------------------
 IX_OLLAMA_URL=http://host.docker.internal:11434
-IX_DEFAULT_MODEL=gpt-oss:20b
+IX_DEFAULT_MODEL=qwen3:14b

 # --- OCR -----------------------------------------------------------------
 IX_OCR_ENGINE=surya
--- a/AGENTS.md
+++ b/AGENTS.md
@ -25,7 +25,7 @@ Status: design phase. Full reference spec at `docs/spec-core-pipeline.md`. MVP s
 - **Language**: Python 3.12, asyncio
 - **Web/REST**: FastAPI + uvicorn
 - **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML)
- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `gpt-oss:20b`, configurable per use case
+- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `qwen3:14b`, configurable per use case
 - **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database
 - **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps)

--- a/docs/deployment.md
+++ b/docs/deployment.md
@ -14,7 +14,7 @@ Mac (dev)
  │                 curl /healthz (60 s gate)
  ▼
 Docker container `infoxtractor` (port 8994)
-  ├─ host.docker.internal:11434  →  Ollama (gpt-oss:20b)
+  ├─ host.docker.internal:11434  →  Ollama (qwen3:14b)
  └─ host.docker.internal:5431   →  postgis (database `infoxtractor`)
 ```

@ -32,7 +32,7 @@ The script:
 2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template).
 3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container.
 4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in.
-5. Verifies `gpt-oss:20b` is pulled in Ollama.
+5. Verifies `qwen3:14b` is pulled in Ollama.
 6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing.

 After the script finishes, add the deploy remote to the local repo:
@ -103,9 +103,9 @@ The `IX_DEFAULT_MODEL` env var on the server's `.env` must match something in `o

 ### If `/healthz` shows `ollama: degraded`

-`gpt-oss:20b` (or the configured default) is not pulled. On the host:
+`qwen3:14b` (or the configured default) is not pulled. On the host:
 ```bash
-ssh server@192.168.68.42 "docker exec ollama ollama pull gpt-oss:20b"
+ssh server@192.168.68.42 "docker exec ollama ollama pull qwen3:14b"
 ```

 ### If `/healthz` shows `ocr: fail`
--- a/scripts/e2e_smoke.py
+++ b/scripts/e2e_smoke.py
@ -98,7 +98,7 @@ def post_job(base_url: str, file_url: str, client_id: str, request_id: str) -> d
    paperless_text = (
        "DKB\n"
        "DE89370400440532013000\n"
-        "Statement period: 01.03.2026 – 31.03.2026\n"
+        "Statement period: 01.03.2026 - 31.03.2026\n"
        "Opening balance: 1234.56 EUR\n"
        "Closing balance: 1450.22 EUR\n"
        "31.03.2026\n"
--- a/scripts/setup_server.sh
+++ b/scripts/setup_server.sh
@ -9,7 +9,7 @@
 #   2. Writes the post-receive hook (or updates it) and makes it executable.
 #   3. Creates the Postgres role + database on the shared `postgis` container.
 #   4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example.
-#   5. Verifies `gpt-oss:20b` is pulled in Ollama.
+#   5. Verifies `qwen3:14b` is pulled in Ollama.

 set -euo pipefail

@ -70,9 +70,9 @@ HOOK
 chmod +x "${REPOS_GIT}/hooks/post-receive"
 EOF

-echo "==> 2/5  Verifying Ollama has gpt-oss:20b pulled"
-if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'gpt-oss:20b'"; then
-  echo "FAIL: gpt-oss:20b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull gpt-oss:20b'" >&2
+echo "==> 2/5  Verifying Ollama has qwen3:14b pulled"
+if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'qwen3:14b'"; then
+  echo "FAIL: qwen3:14b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull qwen3:14b'" >&2
  exit 1
 fi

--- a/src/ix/config.py
+++ b/src/ix/config.py
@ -40,7 +40,7 @@ class AppConfig(BaseSettings):

    # --- LLM backend ---
    ollama_url: str = "http://host.docker.internal:11434"
-    default_model: str = "gpt-oss:20b"
+    default_model: str = "qwen3:14b"

    # --- OCR ---
    ocr_engine: str = "surya"
--- a/src/ix/use_cases/bank_statement_header.py
+++ b/src/ix/use_cases/bank_statement_header.py
@ -26,7 +26,7 @@ class Request(BaseModel):
    model_config = ConfigDict(extra="forbid")

    use_case_name: str = "Bank Statement Header"
-    default_model: str = "gpt-oss:20b"
+    default_model: str = "qwen3:14b"
    system_prompt: str = (
        "You extract header metadata from a single bank or credit-card statement. "
        "Return only facts that appear in the document; leave a field null if uncertain. "
--- a/tests/live/test_ollama_client_live.py
+++ b/tests/live/test_ollama_client_live.py
@ -5,7 +5,7 @@ Never runs in CI (Forgejo runner has no LAN access to Ollama). Run locally::
    IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v

 Assumes the Ollama server at ``http://192.168.68.42:11434`` already has
-``gpt-oss:20b`` pulled.
+``qwen3:14b`` pulled.
 """

 from __future__ import annotations
@ -26,7 +26,7 @@ pytestmark = [
 ]

 _OLLAMA_URL = "http://192.168.68.42:11434"
-_MODEL = "gpt-oss:20b"
+_MODEL = "qwen3:14b"


 async def test_structured_output_round_trip() -> None:
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@ -54,7 +54,7 @@ def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:
        "@host.docker.internal:5431/infoxtractor"
    )
    assert cfg.ollama_url == "http://host.docker.internal:11434"
-    assert cfg.default_model == "gpt-oss:20b"
+    assert cfg.default_model == "qwen3:14b"
    assert cfg.ocr_engine == "surya"
    assert cfg.tmp_dir == "/tmp/ix"
    assert cfg.pipeline_worker_concurrency == 1
--- a/tests/unit/test_genai_step.py
+++ b/tests/unit/test_genai_step.py
@ -363,8 +363,8 @@ class TestModelSelection:
        req = _make_request(include_provenance=False)
        resp = _response_with_segment_index(lines=["hello"])
        await step.process(req, resp)
-        # use-case default is gpt-oss:20b
-        assert client.request_kwargs["model"] == "gpt-oss:20b"  # type: ignore[index]
+        # use-case default is qwen3:14b
+        assert client.request_kwargs["model"] == "qwen3:14b"  # type: ignore[index]


 # ----------------------------------------------------------------------------
--- a/tests/unit/test_use_case_bank_statement_header.py
+++ b/tests/unit/test_use_case_bank_statement_header.py
@ -12,7 +12,7 @@ class TestRequest:
    def test_defaults(self) -> None:
        r = Request()
        assert r.use_case_name == "Bank Statement Header"
-        assert r.default_model == "gpt-oss:20b"
+        assert r.default_model == "qwen3:14b"
        # Stable substring for agent/worker tests that want to confirm the
        # prompt is what they think it is.
        assert "extract header metadata" in r.system_prompt