qwen3:14b (and deepseek-r1, other reasoning models) wrap their output
in <think>…</think> chains-of-thought before emitting real output.
With format=json the constrained sampler terminated immediately at
`{}` because the thinking block wasn't valid JSON; without format the
model thinks normally and appends the actual JSON at the end.
OllamaClient now omits the format flag and extracts the outermost
balanced `{…}` block from the response (brace depth counter, string-
literal aware). Works for reasoning models, ```json``` code-fenced
outputs, and plain JSON alike.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
270 lines
9.6 KiB
Python
270 lines
9.6 KiB
Python
"""Tests for :class:`OllamaClient` — hermetic, pytest-httpx-driven.
|
|
|
|
Covers spec §6 GenAIStep Ollama call contract:
|
|
|
|
* POST body shape (model / messages / format / stream / options).
|
|
* Response parsing → :class:`GenAIInvocationResult`.
|
|
* Error mapping: connection / timeout / 5xx → ``IX_002_000``;
|
|
schema-violating body → ``IX_002_001``.
|
|
* ``selfcheck()``: tags-reachable + model-listed → ``ok``;
|
|
reachable-but-missing → ``degraded``; unreachable → ``fail``.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import httpx
|
|
import pytest
|
|
from pydantic import BaseModel
|
|
from pytest_httpx import HTTPXMock
|
|
|
|
from ix.errors import IXErrorCode, IXException
|
|
from ix.genai.ollama_client import OllamaClient
|
|
|
|
|
|
class _Schema(BaseModel):
|
|
"""Trivial structured-output schema for the round-trip tests."""
|
|
|
|
bank_name: str
|
|
account_number: str | None = None
|
|
|
|
|
|
def _ollama_chat_ok_body(content_json: str) -> dict:
|
|
"""Build a minimal Ollama /api/chat success body."""
|
|
return {
|
|
"model": "gpt-oss:20b",
|
|
"message": {"role": "assistant", "content": content_json},
|
|
"done": True,
|
|
"eval_count": 42,
|
|
"prompt_eval_count": 17,
|
|
}
|
|
|
|
|
|
class TestInvokeHappyPath:
|
|
async def test_posts_to_chat_endpoint_with_format_and_no_stream(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
httpx_mock.add_response(
|
|
url="http://ollama.test:11434/api/chat",
|
|
method="POST",
|
|
json=_ollama_chat_ok_body('{"bank_name":"DKB","account_number":"DE89"}'),
|
|
)
|
|
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=5.0
|
|
)
|
|
result = await client.invoke(
|
|
request_kwargs={
|
|
"model": "gpt-oss:20b",
|
|
"messages": [
|
|
{"role": "system", "content": "You extract."},
|
|
{"role": "user", "content": "Doc body"},
|
|
],
|
|
"temperature": 0.2,
|
|
"reasoning_effort": "high", # dropped silently
|
|
},
|
|
response_schema=_Schema,
|
|
)
|
|
|
|
assert result.parsed == _Schema(bank_name="DKB", account_number="DE89")
|
|
assert result.model_name == "gpt-oss:20b"
|
|
assert result.usage.prompt_tokens == 17
|
|
assert result.usage.completion_tokens == 42
|
|
|
|
# Verify request shape.
|
|
requests = httpx_mock.get_requests()
|
|
assert len(requests) == 1
|
|
body = requests[0].read().decode()
|
|
import json
|
|
|
|
body_json = json.loads(body)
|
|
assert body_json["model"] == "gpt-oss:20b"
|
|
assert body_json["stream"] is False
|
|
# No `format` is sent: Ollama 0.11.8 segfaults on full schemas and
|
|
# aborts to `{}` with `format=json` on reasoning models. Schema is
|
|
# injected into the system prompt instead; we extract the trailing
|
|
# JSON blob from the response and validate via Pydantic.
|
|
assert "format" not in body_json
|
|
assert body_json["options"]["temperature"] == 0.2
|
|
assert "reasoning_effort" not in body_json
|
|
# A schema-guidance system message is prepended to the caller's
|
|
# messages so Ollama (format=json loose mode) emits the right shape.
|
|
msgs = body_json["messages"]
|
|
assert msgs[0]["role"] == "system"
|
|
assert "JSON Schema" in msgs[0]["content"]
|
|
assert msgs[1:] == [
|
|
{"role": "system", "content": "You extract."},
|
|
{"role": "user", "content": "Doc body"},
|
|
]
|
|
|
|
async def test_text_parts_content_list_is_joined(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
httpx_mock.add_response(
|
|
url="http://ollama.test:11434/api/chat",
|
|
method="POST",
|
|
json=_ollama_chat_ok_body('{"bank_name":"X"}'),
|
|
)
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=5.0
|
|
)
|
|
await client.invoke(
|
|
request_kwargs={
|
|
"model": "gpt-oss:20b",
|
|
"messages": [
|
|
{
|
|
"role": "user",
|
|
"content": [
|
|
{"type": "text", "text": "part-a"},
|
|
{"type": "text", "text": "part-b"},
|
|
],
|
|
}
|
|
],
|
|
},
|
|
response_schema=_Schema,
|
|
)
|
|
import json
|
|
|
|
request_body = json.loads(httpx_mock.get_requests()[0].read())
|
|
# First message is the auto-injected schema guidance; after that
|
|
# the caller's user message has its text parts joined.
|
|
assert request_body["messages"][0]["role"] == "system"
|
|
assert request_body["messages"][1:] == [
|
|
{"role": "user", "content": "part-a\npart-b"}
|
|
]
|
|
|
|
|
|
class TestInvokeErrorPaths:
|
|
async def test_connection_error_maps_to_002_000(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
httpx_mock.add_exception(httpx.ConnectError("refused"))
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=1.0
|
|
)
|
|
with pytest.raises(IXException) as ei:
|
|
await client.invoke(
|
|
request_kwargs={
|
|
"model": "gpt-oss:20b",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
},
|
|
response_schema=_Schema,
|
|
)
|
|
assert ei.value.code is IXErrorCode.IX_002_000
|
|
|
|
async def test_read_timeout_maps_to_002_000(self, httpx_mock: HTTPXMock) -> None:
|
|
httpx_mock.add_exception(httpx.ReadTimeout("slow"))
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=0.5
|
|
)
|
|
with pytest.raises(IXException) as ei:
|
|
await client.invoke(
|
|
request_kwargs={
|
|
"model": "gpt-oss:20b",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
},
|
|
response_schema=_Schema,
|
|
)
|
|
assert ei.value.code is IXErrorCode.IX_002_000
|
|
|
|
async def test_500_maps_to_002_000_with_body_snippet(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
httpx_mock.add_response(
|
|
url="http://ollama.test:11434/api/chat",
|
|
method="POST",
|
|
status_code=500,
|
|
text="boom boom server broken",
|
|
)
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=5.0
|
|
)
|
|
with pytest.raises(IXException) as ei:
|
|
await client.invoke(
|
|
request_kwargs={
|
|
"model": "gpt-oss:20b",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
},
|
|
response_schema=_Schema,
|
|
)
|
|
assert ei.value.code is IXErrorCode.IX_002_000
|
|
assert "boom" in (ei.value.detail or "")
|
|
|
|
async def test_200_with_invalid_json_maps_to_002_001(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
httpx_mock.add_response(
|
|
url="http://ollama.test:11434/api/chat",
|
|
method="POST",
|
|
json=_ollama_chat_ok_body("not-json"),
|
|
)
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=5.0
|
|
)
|
|
with pytest.raises(IXException) as ei:
|
|
await client.invoke(
|
|
request_kwargs={
|
|
"model": "gpt-oss:20b",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
},
|
|
response_schema=_Schema,
|
|
)
|
|
assert ei.value.code is IXErrorCode.IX_002_001
|
|
|
|
async def test_200_with_schema_violation_maps_to_002_001(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
# Missing required `bank_name` field.
|
|
httpx_mock.add_response(
|
|
url="http://ollama.test:11434/api/chat",
|
|
method="POST",
|
|
json=_ollama_chat_ok_body('{"account_number":"DE89"}'),
|
|
)
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=5.0
|
|
)
|
|
with pytest.raises(IXException) as ei:
|
|
await client.invoke(
|
|
request_kwargs={
|
|
"model": "gpt-oss:20b",
|
|
"messages": [{"role": "user", "content": "hi"}],
|
|
},
|
|
response_schema=_Schema,
|
|
)
|
|
assert ei.value.code is IXErrorCode.IX_002_001
|
|
|
|
|
|
class TestSelfcheck:
|
|
async def test_selfcheck_ok_when_model_listed(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
httpx_mock.add_response(
|
|
url="http://ollama.test:11434/api/tags",
|
|
method="GET",
|
|
json={"models": [{"name": "gpt-oss:20b"}, {"name": "qwen2.5:32b"}]},
|
|
)
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=5.0
|
|
)
|
|
assert await client.selfcheck(expected_model="gpt-oss:20b") == "ok"
|
|
|
|
async def test_selfcheck_degraded_when_model_missing(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
httpx_mock.add_response(
|
|
url="http://ollama.test:11434/api/tags",
|
|
method="GET",
|
|
json={"models": [{"name": "qwen2.5:32b"}]},
|
|
)
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=5.0
|
|
)
|
|
assert await client.selfcheck(expected_model="gpt-oss:20b") == "degraded"
|
|
|
|
async def test_selfcheck_fail_on_connection_error(
|
|
self, httpx_mock: HTTPXMock
|
|
) -> None:
|
|
httpx_mock.add_exception(httpx.ConnectError("refused"))
|
|
client = OllamaClient(
|
|
base_url="http://ollama.test:11434", per_call_timeout_s=5.0
|
|
)
|
|
assert await client.selfcheck(expected_model="gpt-oss:20b") == "fail"
|