chore(model): qwen3:14b default (#30)
All checks were successful
tests / test (push) Successful in 2m55s

unblock first deploy
This commit is contained in:
goldstein 2026-04-18 10:20:38 +00:00
commit a9e510362d
11 changed files with 19 additions and 19 deletions

View file

@ -8,7 +8,7 @@ IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.interna
# --- LLM backend ---------------------------------------------------------
IX_OLLAMA_URL=http://host.docker.internal:11434
IX_DEFAULT_MODEL=gpt-oss:20b
IX_DEFAULT_MODEL=qwen3:14b
# --- OCR -----------------------------------------------------------------
IX_OCR_ENGINE=surya

View file

@ -25,7 +25,7 @@ Status: design phase. Full reference spec at `docs/spec-core-pipeline.md`. MVP s
- **Language**: Python 3.12, asyncio
- **Web/REST**: FastAPI + uvicorn
- **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML)
- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `gpt-oss:20b`, configurable per use case
- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `qwen3:14b`, configurable per use case
- **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database
- **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps)

View file

@ -14,7 +14,7 @@ Mac (dev)
│ curl /healthz (60 s gate)
Docker container `infoxtractor` (port 8994)
├─ host.docker.internal:11434 → Ollama (gpt-oss:20b)
├─ host.docker.internal:11434 → Ollama (qwen3:14b)
└─ host.docker.internal:5431 → postgis (database `infoxtractor`)
```
@ -32,7 +32,7 @@ The script:
2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template).
3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container.
4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in.
5. Verifies `gpt-oss:20b` is pulled in Ollama.
5. Verifies `qwen3:14b` is pulled in Ollama.
6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing.
After the script finishes, add the deploy remote to the local repo:
@ -103,9 +103,9 @@ The `IX_DEFAULT_MODEL` env var on the server's `.env` must match something in `o
### If `/healthz` shows `ollama: degraded`
`gpt-oss:20b` (or the configured default) is not pulled. On the host:
`qwen3:14b` (or the configured default) is not pulled. On the host:
```bash
ssh server@192.168.68.42 "docker exec ollama ollama pull gpt-oss:20b"
ssh server@192.168.68.42 "docker exec ollama ollama pull qwen3:14b"
```
### If `/healthz` shows `ocr: fail`

View file

@ -98,7 +98,7 @@ def post_job(base_url: str, file_url: str, client_id: str, request_id: str) -> d
paperless_text = (
"DKB\n"
"DE89370400440532013000\n"
"Statement period: 01.03.2026 31.03.2026\n"
"Statement period: 01.03.2026 - 31.03.2026\n"
"Opening balance: 1234.56 EUR\n"
"Closing balance: 1450.22 EUR\n"
"31.03.2026\n"

View file

@ -9,7 +9,7 @@
# 2. Writes the post-receive hook (or updates it) and makes it executable.
# 3. Creates the Postgres role + database on the shared `postgis` container.
# 4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example.
# 5. Verifies `gpt-oss:20b` is pulled in Ollama.
# 5. Verifies `qwen3:14b` is pulled in Ollama.
set -euo pipefail
@ -70,9 +70,9 @@ HOOK
chmod +x "${REPOS_GIT}/hooks/post-receive"
EOF
echo "==> 2/5 Verifying Ollama has gpt-oss:20b pulled"
if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'gpt-oss:20b'"; then
echo "FAIL: gpt-oss:20b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull gpt-oss:20b'" >&2
echo "==> 2/5 Verifying Ollama has qwen3:14b pulled"
if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'qwen3:14b'"; then
echo "FAIL: qwen3:14b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull qwen3:14b'" >&2
exit 1
fi

View file

@ -40,7 +40,7 @@ class AppConfig(BaseSettings):
# --- LLM backend ---
ollama_url: str = "http://host.docker.internal:11434"
default_model: str = "gpt-oss:20b"
default_model: str = "qwen3:14b"
# --- OCR ---
ocr_engine: str = "surya"

View file

@ -26,7 +26,7 @@ class Request(BaseModel):
model_config = ConfigDict(extra="forbid")
use_case_name: str = "Bank Statement Header"
default_model: str = "gpt-oss:20b"
default_model: str = "qwen3:14b"
system_prompt: str = (
"You extract header metadata from a single bank or credit-card statement. "
"Return only facts that appear in the document; leave a field null if uncertain. "

View file

@ -5,7 +5,7 @@ Never runs in CI (Forgejo runner has no LAN access to Ollama). Run locally::
IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v
Assumes the Ollama server at ``http://192.168.68.42:11434`` already has
``gpt-oss:20b`` pulled.
``qwen3:14b`` pulled.
"""
from __future__ import annotations
@ -26,7 +26,7 @@ pytestmark = [
]
_OLLAMA_URL = "http://192.168.68.42:11434"
_MODEL = "gpt-oss:20b"
_MODEL = "qwen3:14b"
async def test_structured_output_round_trip() -> None:

View file

@ -54,7 +54,7 @@ def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:
"@host.docker.internal:5431/infoxtractor"
)
assert cfg.ollama_url == "http://host.docker.internal:11434"
assert cfg.default_model == "gpt-oss:20b"
assert cfg.default_model == "qwen3:14b"
assert cfg.ocr_engine == "surya"
assert cfg.tmp_dir == "/tmp/ix"
assert cfg.pipeline_worker_concurrency == 1

View file

@ -363,8 +363,8 @@ class TestModelSelection:
req = _make_request(include_provenance=False)
resp = _response_with_segment_index(lines=["hello"])
await step.process(req, resp)
# use-case default is gpt-oss:20b
assert client.request_kwargs["model"] == "gpt-oss:20b" # type: ignore[index]
# use-case default is qwen3:14b
assert client.request_kwargs["model"] == "qwen3:14b" # type: ignore[index]
# ----------------------------------------------------------------------------

View file

@ -12,7 +12,7 @@ class TestRequest:
def test_defaults(self) -> None:
r = Request()
assert r.use_case_name == "Bank Statement Header"
assert r.default_model == "gpt-oss:20b"
assert r.default_model == "qwen3:14b"
# Stable substring for agent/worker tests that want to confirm the
# prompt is what they think it is.
assert "extract header metadata" in r.system_prompt