Compare commits

..

No commits in common. "95a576f744b12e8e5132325ff83bfe2e20155c8d" and "763407ba1c02ccec337533214bcd4c08137c17aa" have entirely different histories.

2 changed files with 6 additions and 50 deletions

View file

@ -96,9 +96,8 @@ class OllamaClient:
) from exc
content = (payload.get("message") or {}).get("content") or ""
json_blob = _extract_json_blob(content)
try:
parsed = response_schema.model_validate_json(json_blob)
parsed = response_schema.model_validate_json(content)
except ValidationError as exc:
raise IXException(
IXErrorCode.IX_002_001,
@ -187,12 +186,7 @@ class OllamaClient:
"model": request_kwargs.get("model"),
"messages": messages,
"stream": False,
# NOTE: format is deliberately omitted. `format="json"` made
# reasoning models (qwen3) abort after emitting `{}` because the
# constrained sampler terminated before the chain-of-thought
# finished; `format=<schema>` segfaulted Ollama 0.11.8. Letting
# the model stream freely and then extracting the trailing JSON
# blob works for both reasoning and non-reasoning models.
"format": "json",
}
options: dict[str, Any] = {}
@ -224,43 +218,6 @@ class OllamaClient:
return out
def _extract_json_blob(text: str) -> str:
"""Return the outermost balanced JSON object in ``text``.
Reasoning models (qwen3, deepseek-r1) wrap their real answer in
``<think></think>`` blocks. Other models sometimes prefix prose or
fence the JSON in ```json``` code blocks. Finding the last balanced
``{}`` is the cheapest robust parse that works for all three shapes;
a malformed response yields the full text and Pydantic catches it
downstream as ``IX_002_001``.
"""
start = text.find("{")
if start < 0:
return text
depth = 0
in_string = False
escaped = False
for i in range(start, len(text)):
ch = text[i]
if in_string:
if escaped:
escaped = False
elif ch == "\\":
escaped = True
elif ch == '"':
in_string = False
continue
if ch == '"':
in_string = True
elif ch == "{":
depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
return text[start : i + 1]
return text[start:]
def _inject_schema_system_message(
messages: list[dict[str, Any]],
response_schema: type[BaseModel],

View file

@ -79,11 +79,10 @@ class TestInvokeHappyPath:
body_json = json.loads(body)
assert body_json["model"] == "gpt-oss:20b"
assert body_json["stream"] is False
# No `format` is sent: Ollama 0.11.8 segfaults on full schemas and
# aborts to `{}` with `format=json` on reasoning models. Schema is
# injected into the system prompt instead; we extract the trailing
# JSON blob from the response and validate via Pydantic.
assert "format" not in body_json
# format is "json" (loose mode): Ollama 0.11.8 segfaults on full
# Pydantic schemas. We pass the schema via the system prompt
# upstream and validate on parse.
assert body_json["format"] == "json"
assert body_json["options"]["temperature"] == 0.2
assert "reasoning_effort" not in body_json
# A schema-guidance system message is prepended to the caller's