chore(model): qwen3:14b default (#30)
All checks were successful
tests / test (push) Successful in 2m55s
All checks were successful
tests / test (push) Successful in 2m55s
unblock first deploy
This commit is contained in:
commit
a9e510362d
11 changed files with 19 additions and 19 deletions
|
|
@ -8,7 +8,7 @@ IX_POSTGRES_URL=postgresql+asyncpg://infoxtractor:<password>@host.docker.interna
|
|||
|
||||
# --- LLM backend ---------------------------------------------------------
|
||||
IX_OLLAMA_URL=http://host.docker.internal:11434
|
||||
IX_DEFAULT_MODEL=gpt-oss:20b
|
||||
IX_DEFAULT_MODEL=qwen3:14b
|
||||
|
||||
# --- OCR -----------------------------------------------------------------
|
||||
IX_OCR_ENGINE=surya
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ Status: design phase. Full reference spec at `docs/spec-core-pipeline.md`. MVP s
|
|||
- **Language**: Python 3.12, asyncio
|
||||
- **Web/REST**: FastAPI + uvicorn
|
||||
- **OCR (pluggable)**: Surya OCR first (GPU, shares RTX 3090 with Ollama / Immich ML)
|
||||
- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `gpt-oss:20b`, configurable per use case
|
||||
- **LLM**: Ollama at `192.168.68.42:11434`, structured outputs via JSON schema. Initial model candidate: `qwen2.5:32b` / `qwen3:14b`, configurable per use case
|
||||
- **State**: Postgres on the shared `postgis` container (:5431), new `infoxtractor` database
|
||||
- **Deployment**: Docker, `git push server main` → post-receive rebuild (pattern from other apps)
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ Mac (dev)
|
|||
│ curl /healthz (60 s gate)
|
||||
▼
|
||||
Docker container `infoxtractor` (port 8994)
|
||||
├─ host.docker.internal:11434 → Ollama (gpt-oss:20b)
|
||||
├─ host.docker.internal:11434 → Ollama (qwen3:14b)
|
||||
└─ host.docker.internal:5431 → postgis (database `infoxtractor`)
|
||||
```
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ The script:
|
|||
2. Installs the `post-receive` hook (see `scripts/setup_server.sh` for the template).
|
||||
3. Creates the `infoxtractor` Postgres role + database on the shared `postgis` container.
|
||||
4. Writes `/home/server/Public/infoxtractor/app/.env` (mode 0600) from `.env.example` with the password substituted in.
|
||||
5. Verifies `gpt-oss:20b` is pulled in Ollama.
|
||||
5. Verifies `qwen3:14b` is pulled in Ollama.
|
||||
6. Prints a hint to open UFW for port 8994 on the LAN subnet if it's missing.
|
||||
|
||||
After the script finishes, add the deploy remote to the local repo:
|
||||
|
|
@ -103,9 +103,9 @@ The `IX_DEFAULT_MODEL` env var on the server's `.env` must match something in `o
|
|||
|
||||
### If `/healthz` shows `ollama: degraded`
|
||||
|
||||
`gpt-oss:20b` (or the configured default) is not pulled. On the host:
|
||||
`qwen3:14b` (or the configured default) is not pulled. On the host:
|
||||
```bash
|
||||
ssh server@192.168.68.42 "docker exec ollama ollama pull gpt-oss:20b"
|
||||
ssh server@192.168.68.42 "docker exec ollama ollama pull qwen3:14b"
|
||||
```
|
||||
|
||||
### If `/healthz` shows `ocr: fail`
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ def post_job(base_url: str, file_url: str, client_id: str, request_id: str) -> d
|
|||
paperless_text = (
|
||||
"DKB\n"
|
||||
"DE89370400440532013000\n"
|
||||
"Statement period: 01.03.2026 – 31.03.2026\n"
|
||||
"Statement period: 01.03.2026 - 31.03.2026\n"
|
||||
"Opening balance: 1234.56 EUR\n"
|
||||
"Closing balance: 1450.22 EUR\n"
|
||||
"31.03.2026\n"
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
# 2. Writes the post-receive hook (or updates it) and makes it executable.
|
||||
# 3. Creates the Postgres role + database on the shared `postgis` container.
|
||||
# 4. Writes `/home/server/Public/infoxtractor/app/.env` (0600) from .env.example.
|
||||
# 5. Verifies `gpt-oss:20b` is pulled in Ollama.
|
||||
# 5. Verifies `qwen3:14b` is pulled in Ollama.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
|
|
@ -70,9 +70,9 @@ HOOK
|
|||
chmod +x "${REPOS_GIT}/hooks/post-receive"
|
||||
EOF
|
||||
|
||||
echo "==> 2/5 Verifying Ollama has gpt-oss:20b pulled"
|
||||
if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'gpt-oss:20b'"; then
|
||||
echo "FAIL: gpt-oss:20b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull gpt-oss:20b'" >&2
|
||||
echo "==> 2/5 Verifying Ollama has qwen3:14b pulled"
|
||||
if ! ssh "${SERVER}" "docker exec ollama ollama list | awk '{print \$1}' | grep -qx 'qwen3:14b'"; then
|
||||
echo "FAIL: qwen3:14b not found in Ollama. Run: ssh ${SERVER} 'docker exec ollama ollama pull qwen3:14b'" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class AppConfig(BaseSettings):
|
|||
|
||||
# --- LLM backend ---
|
||||
ollama_url: str = "http://host.docker.internal:11434"
|
||||
default_model: str = "gpt-oss:20b"
|
||||
default_model: str = "qwen3:14b"
|
||||
|
||||
# --- OCR ---
|
||||
ocr_engine: str = "surya"
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class Request(BaseModel):
|
|||
model_config = ConfigDict(extra="forbid")
|
||||
|
||||
use_case_name: str = "Bank Statement Header"
|
||||
default_model: str = "gpt-oss:20b"
|
||||
default_model: str = "qwen3:14b"
|
||||
system_prompt: str = (
|
||||
"You extract header metadata from a single bank or credit-card statement. "
|
||||
"Return only facts that appear in the document; leave a field null if uncertain. "
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ Never runs in CI (Forgejo runner has no LAN access to Ollama). Run locally::
|
|||
IX_TEST_OLLAMA=1 uv run pytest tests/live/test_ollama_client_live.py -v
|
||||
|
||||
Assumes the Ollama server at ``http://192.168.68.42:11434`` already has
|
||||
``gpt-oss:20b`` pulled.
|
||||
``qwen3:14b`` pulled.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
|
@ -26,7 +26,7 @@ pytestmark = [
|
|||
]
|
||||
|
||||
_OLLAMA_URL = "http://192.168.68.42:11434"
|
||||
_MODEL = "gpt-oss:20b"
|
||||
_MODEL = "qwen3:14b"
|
||||
|
||||
|
||||
async def test_structured_output_round_trip() -> None:
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ def test_defaults_match_spec(monkeypatch: pytest.MonkeyPatch) -> None:
|
|||
"@host.docker.internal:5431/infoxtractor"
|
||||
)
|
||||
assert cfg.ollama_url == "http://host.docker.internal:11434"
|
||||
assert cfg.default_model == "gpt-oss:20b"
|
||||
assert cfg.default_model == "qwen3:14b"
|
||||
assert cfg.ocr_engine == "surya"
|
||||
assert cfg.tmp_dir == "/tmp/ix"
|
||||
assert cfg.pipeline_worker_concurrency == 1
|
||||
|
|
|
|||
|
|
@ -363,8 +363,8 @@ class TestModelSelection:
|
|||
req = _make_request(include_provenance=False)
|
||||
resp = _response_with_segment_index(lines=["hello"])
|
||||
await step.process(req, resp)
|
||||
# use-case default is gpt-oss:20b
|
||||
assert client.request_kwargs["model"] == "gpt-oss:20b" # type: ignore[index]
|
||||
# use-case default is qwen3:14b
|
||||
assert client.request_kwargs["model"] == "qwen3:14b" # type: ignore[index]
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ class TestRequest:
|
|||
def test_defaults(self) -> None:
|
||||
r = Request()
|
||||
assert r.use_case_name == "Bank Statement Header"
|
||||
assert r.default_model == "gpt-oss:20b"
|
||||
assert r.default_model == "qwen3:14b"
|
||||
# Stable substring for agent/worker tests that want to confirm the
|
||||
# prompt is what they think it is.
|
||||
assert "extract header metadata" in r.system_prompt
|
||||
|
|
|
|||
Loading…
Reference in a new issue