Compare commits

...

2 commits

Author SHA1 Message Date
a9e510362d chore(model): qwen3:14b default (#30)
All checks were successful
tests / test (push) Successful in 2m55s
unblock first deploy
2026-04-18 10:20:38 +00:00
5ee74f367c chore(model): switch default IX_DEFAULT_MODEL to qwen3:14b (already on host)
All checks were successful
tests / test (push) Successful in 1m52s
tests / test (pull_request) Successful in 1m45s
The home server's Ollama doesn't have gpt-oss:20b pulled; qwen3:14b is
already there and is what mammon's chat agent uses. Switching the default
now so the first deploy passes the /healthz ollama probe without an extra
`ollama pull` step. The spec lists gpt-oss:20b as a concrete example;
qwen3:14b is equally on-prem and Ollama-structured-output-compatible.

Touched: AppConfig default, BankStatementHeader Request.default_model,
.env.example, setup_server.sh ollama-list check, AGENTS.md, deployment.md,
live tests. Unit tests that hard-coded the old model string but don't
assert the default were left alone.

Also: ASCII en-dash in e2e_smoke.py Paperless-style text (ruff RUF001).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 12:20:23 +02:00
11 changed files with 19 additions and 19 deletions

View file

@ -8,7 +8,7 @@ IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.interna
# --- LLM backend --------------------------------------------------------- # --- LLM backend ---------------------------------------------------------
IX_OLLAMA_URL=http://host.docker.internal:11434 IX_OLLAMA_URL=http://host.docker.internal:11434
IX_DEFAULT_MODEL=gpt-oss:20b IX_DEFAULT_MODEL=qwen3:14b
# --- OCR ----------------------------------------------------------------- # --- OCR -----------------------------------------------------------------
IX_OCR_ENGINE=surya IX_OCR_ENGINE=surya

View file

@ -25,7 +25,7 @@ Status: design phase. Full reference spec at `docs/spec-core-pipeline.md`. MVP s
- **Language**: Python 3.12, asyncio - **Language**: Python 3.12, asyncio
- **Web/REST**: FastAPI + uvicorn - **Web/REST**: FastAPI + uvicorn
- **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML) - **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML)
- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `gpt-oss:20b`, configurable per use case - **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `qwen3:14b`, configurable per use case
- **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database - **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database
- **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps) - **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps)

View file

@ -14,7 +14,7 @@ Mac (dev)
│ curl /healthz (60 s gate) │ curl /healthz (60 s gate)
Docker container `infoxtractor` (port 8994) Docker container `infoxtractor` (port 8994)
├─ host.docker.internal:11434 → Ollama (gpt-oss:20b) ├─ host.docker.internal:11434 → Ollama (qwen3:14b)
└─ host.docker.internal:5431 → postgis (database `infoxtractor`) └─ host.docker.internal:5431 → postgis (database `infoxtractor`)
``` ```
@ -32,7 +32,7 @@ The script:
2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template). 2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template).
3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container. 3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container.
4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in. 4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in.
5. Verifies `gpt-oss:20b` is pulled in Ollama. 5. Verifies `qwen3:14b` is pulled in Ollama.
6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing. 6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing.
After the script finishes, add the deploy remote to the local repo: After the script finishes, add the deploy remote to the local repo:
@ -103,9 +103,9 @@ The `IX_DEFAULT_MODEL` env var on the server's `.env` must match something in `o
### If `/healthz` shows `ollama: degraded` ### If `/healthz` shows `ollama: degraded`
`gpt-oss:20b` (or the configured default) is not pulled. On the host: `qwen3:14b` (or the configured default) is not pulled. On the host:
```bash ```bash
ssh server@192.168.68.42 "docker exec ollama ollama pull gpt-oss:20b" ssh server@192.168.68.42 "docker exec ollama ollama pull qwen3:14b"
``` ```
### If `/healthz` shows `ocr: fail` ### If `/healthz` shows `ocr: fail`

View file

@ -98,7 +98,7 @@ def post_job(base_url: str, file_url: str, client_id: str, request_id: str) -> d
paperless_text = ( paperless_text = (
"DKB\n" "DKB\n"
"DE89370400440532013000\n" "DE89370400440532013000\n"
"Statement period: 01.03.2026 31.03.2026\n" "Statement period: 01.03.2026 - 31.03.2026\n"
"Opening balance: 1234.56 EUR\n" "Opening balance: 1234.56 EUR\n"
"Closing balance: 1450.22 EUR\n" "Closing balance: 1450.22 EUR\n"
"31.03.2026\n" "31.03.2026\n"

View file

@ -9,7 +9,7 @@
# 2. Writes the post-receive hook (or updates it) and makes it executable. # 2. Writes the post-receive hook (or updates it) and makes it executable.
# 3. Creates the Postgres role + database on the shared `postgis` container. # 3. Creates the Postgres role + database on the shared `postgis` container.
# 4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example. # 4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example.
# 5. Verifies `gpt-oss:20b` is pulled in Ollama. # 5. Verifies `qwen3:14b` is pulled in Ollama.
set -euo pipefail set -euo pipefail
@ -70,9 +70,9 @@ HOOK
chmod +x "${REPOS_GIT}/hooks/post-receive" chmod +x "${REPOS_GIT}/hooks/post-receive"
EOF EOF
echo "==> 2/5 Verifying Ollama has gpt-oss:20b pulled" echo "==> 2/5 Verifying Ollama has qwen3:14b pulled"
if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'gpt-oss:20b'"; then if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'qwen3:14b'"; then
echo "FAIL: gpt-oss:20b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull gpt-oss:20b'" >&2 echo "FAIL: qwen3:14b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull qwen3:14b'" >&2
exit 1 exit 1
fi fi

View file

@ -40,7 +40,7 @@ class AppConfig(BaseSettings):
# --- LLM backend --- # --- LLM backend ---
ollama_url: str = "http://host.docker.internal:11434" ollama_url: str = "http://host.docker.internal:11434"
default_model: str = "gpt-oss:20b" default_model: str = "qwen3:14b"
# --- OCR --- # --- OCR ---
ocr_engine: str = "surya" ocr_engine: str = "surya"

View file

@ -26,7 +26,7 @@ class Request(BaseModel):
model_config = ConfigDict(extra="forbid") model_config = ConfigDict(extra="forbid")
use_case_name: str = "Bank Statement Header" use_case_name: str = "Bank Statement Header"
default_model: str = "gpt-oss:20b" default_model: str = "qwen3:14b"
system_prompt: str = ( system_prompt: str = (
"You extract header metadata from a single bank or credit-card statement. " "You extract header metadata from a single bank or credit-card statement. "
"Return only facts that appear in the document; leave a field null if uncertain. " "Return only facts that appear in the document; leave a field null if uncertain. "

View file

@ -5,7 +5,7 @@ Never runs in CI (Forgejo runner has no LAN access to Ollama). Run locally::
IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v
Assumes the Ollama server at ``http://192.168.68.42:11434`` already has Assumes the Ollama server at ``http://192.168.68.42:11434`` already has
``gpt-oss:20b`` pulled. ``qwen3:14b`` pulled.
""" """
from __future__ import annotations from __future__ import annotations
@ -26,7 +26,7 @@ pytestmark = [
] ]
_OLLAMA_URL = "http://192.168.68.42:11434" _OLLAMA_URL = "http://192.168.68.42:11434"
_MODEL = "gpt-oss:20b" _MODEL = "qwen3:14b"
async def test_structured_output_round_trip() -> None: async def test_structured_output_round_trip() -> None:

View file

@ -54,7 +54,7 @@ def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:
"@host.docker.internal:5431/infoxtractor" "@host.docker.internal:5431/infoxtractor"
) )
assert cfg.ollama_url == "http://host.docker.internal:11434" assert cfg.ollama_url == "http://host.docker.internal:11434"
assert cfg.default_model == "gpt-oss:20b" assert cfg.default_model == "qwen3:14b"
assert cfg.ocr_engine == "surya" assert cfg.ocr_engine == "surya"
assert cfg.tmp_dir == "/tmp/ix" assert cfg.tmp_dir == "/tmp/ix"
assert cfg.pipeline_worker_concurrency == 1 assert cfg.pipeline_worker_concurrency == 1

View file

@ -363,8 +363,8 @@ class TestModelSelection:
req = _make_request(include_provenance=False) req = _make_request(include_provenance=False)
resp = _response_with_segment_index(lines=["hello"]) resp = _response_with_segment_index(lines=["hello"])
await step.process(req, resp) await step.process(req, resp)
# use-case default is gpt-oss:20b # use-case default is qwen3:14b
assert client.request_kwargs["model"] == "gpt-oss:20b" # type: ignore[index] assert client.request_kwargs["model"] == "qwen3:14b" # type: ignore[index]
# ---------------------------------------------------------------------------- # ----------------------------------------------------------------------------

View file

@ -12,7 +12,7 @@ class TestRequest:
def test_defaults(self) -> None: def test_defaults(self) -> None:
r = Request() r = Request()
assert r.use_case_name == "Bank Statement Header" assert r.use_case_name == "Bank Statement Header"
assert r.default_model == "gpt-oss:20b" assert r.default_model == "qwen3:14b"
# Stable substring for agent/worker tests that want to confirm the # Stable substring for agent/worker tests that want to confirm the
# prompt is what they think it is. # prompt is what they think it is.
assert "extract header metadata" in r.system_prompt assert "extract header metadata" in r.system_prompt