From 81e3b9a7d04808ff102d79b37a6e799db310ab4e Mon Sep 17 00:00:00 2001 From: Dirk Riemann Date: Sat, 18 Apr 2026 14:05:28 +0200 Subject: [PATCH] fix(genai): drop Ollama format flag; extract trailing JSON from response MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit qwen3:14b (and deepseek-r1, other reasoning models) wrap their output in chains-of-thought before emitting real output. With format=json the constrained sampler terminated immediately at `{}` because the thinking block wasn't valid JSON; without format the model thinks normally and appends the actual JSON at the end. OllamaClient now omits the format flag and extracts the outermost balanced `{…}` block from the response (brace depth counter, string- literal aware). Works for reasoning models, ```json``` code-fenced outputs, and plain JSON alike. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ix/genai/ollama_client.py | 47 ++++++++++++++++++++++++++++++-- tests/unit/test_ollama_client.py | 9 +++--- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/src/ix/genai/ollama_client.py b/src/ix/genai/ollama_client.py index 0cc3783..d060b6e 100644 --- a/src/ix/genai/ollama_client.py +++ b/src/ix/genai/ollama_client.py @@ -96,8 +96,9 @@ class OllamaClient: ) from exc content = (payload.get("message") or {}).get("content") or "" + json_blob = _extract_json_blob(content) try: - parsed = response_schema.model_validate_json(content) + parsed = response_schema.model_validate_json(json_blob) except ValidationError as exc: raise IXException( IXErrorCode.IX_002_001, @@ -186,7 +187,12 @@ class OllamaClient: "model": request_kwargs.get("model"), "messages": messages, "stream": False, - "format": "json", + # NOTE: format is deliberately omitted. `format="json"` made + # reasoning models (qwen3) abort after emitting `{}` because the + # constrained sampler terminated before the chain-of-thought + # finished; `format=` segfaulted Ollama 0.11.8. Letting + # the model stream freely and then extracting the trailing JSON + # blob works for both reasoning and non-reasoning models. } options: dict[str, Any] = {} @@ -218,6 +224,43 @@ class OllamaClient: return out +def _extract_json_blob(text: str) -> str: + """Return the outermost balanced JSON object in ``text``. + + Reasoning models (qwen3, deepseek-r1) wrap their real answer in + ```` blocks. Other models sometimes prefix prose or + fence the JSON in ```json``` code blocks. Finding the last balanced + ``{…}`` is the cheapest robust parse that works for all three shapes; + a malformed response yields the full text and Pydantic catches it + downstream as ``IX_002_001``. + """ + start = text.find("{") + if start < 0: + return text + depth = 0 + in_string = False + escaped = False + for i in range(start, len(text)): + ch = text[i] + if in_string: + if escaped: + escaped = False + elif ch == "\\": + escaped = True + elif ch == '"': + in_string = False + continue + if ch == '"': + in_string = True + elif ch == "{": + depth += 1 + elif ch == "}": + depth -= 1 + if depth == 0: + return text[start : i + 1] + return text[start:] + + def _inject_schema_system_message( messages: list[dict[str, Any]], response_schema: type[BaseModel], diff --git a/tests/unit/test_ollama_client.py b/tests/unit/test_ollama_client.py index 57f9f01..d6ee41f 100644 --- a/tests/unit/test_ollama_client.py +++ b/tests/unit/test_ollama_client.py @@ -79,10 +79,11 @@ class TestInvokeHappyPath: body_json = json.loads(body) assert body_json["model"] == "gpt-oss:20b" assert body_json["stream"] is False - # format is "json" (loose mode): Ollama 0.11.8 segfaults on full - # Pydantic schemas. We pass the schema via the system prompt - # upstream and validate on parse. - assert body_json["format"] == "json" + # No `format` is sent: Ollama 0.11.8 segfaults on full schemas and + # aborts to `{}` with `format=json` on reasoning models. Schema is + # injected into the system prompt instead; we extract the trailing + # JSON blob from the response and validate via Pydantic. + assert "format" not in body_json assert body_json["options"]["temperature"] == 0.2 assert "reasoning_effort" not in body_json # A schema-guidance system message is prepended to the caller's