2 changed files with 6 additions and 50 deletions
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@ -96,9 +96,8 @@ class OllamaClient:
            ) from exc

        content = (payload.get("message") or {}).get("content") or ""
-        json_blob = _extract_json_blob(content)
        try:
-            parsed = response_schema.model_validate_json(json_blob)
+            parsed = response_schema.model_validate_json(content)
        except ValidationError as exc:
            raise IXException(
                IXErrorCode.IX_002_001,
@ -187,12 +186,7 @@ class OllamaClient:
            "model": request_kwargs.get("model"),
            "messages": messages,
            "stream": False,
-            # NOTE: format is deliberately omitted. `format="json"` made
-            # reasoning models (qwen3) abort after emitting `{}` because the
-            # constrained sampler terminated before the chain-of-thought
-            # finished; `format=<schema>` segfaulted Ollama 0.11.8. Letting
-            # the model stream freely and then extracting the trailing JSON
-            # blob works for both reasoning and non-reasoning models.
+            "format": "json",
        }

        options: dict[str, Any] = {}
@ -224,43 +218,6 @@ class OllamaClient:
        return out


-def _extract_json_blob(text: str) -> str:
-    """Return the outermost balanced JSON object in ``text``.
-
-    Reasoning models (qwen3, deepseek-r1) wrap their real answer in
-    ``<think>…</think>`` blocks. Other models sometimes prefix prose or
-    fence the JSON in ```json``` code blocks. Finding the last balanced
-    ``{…}`` is the cheapest robust parse that works for all three shapes;
-    a malformed response yields the full text and Pydantic catches it
-    downstream as ``IX_002_001``.
-    """
-    start = text.find("{")
-    if start < 0:
-        return text
-    depth = 0
-    in_string = False
-    escaped = False
-    for i in range(start, len(text)):
-        ch = text[i]
-        if in_string:
-            if escaped:
-                escaped = False
-            elif ch == "\\":
-                escaped = True
-            elif ch == '"':
-                in_string = False
-            continue
-        if ch == '"':
-            in_string = True
-        elif ch == "{":
-            depth += 1
-        elif ch == "}":
-            depth -= 1
-            if depth == 0:
-                return text[start : i + 1]
-    return text[start:]
-
-
 def _inject_schema_system_message(
    messages: list[dict[str, Any]],
    response_schema: type[BaseModel],
--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@ -79,11 +79,10 @@ class TestInvokeHappyPath:
        body_json = json.loads(body)
        assert body_json["model"] == "gpt-oss:20b"
        assert body_json["stream"] is False
-        # No `format` is sent: Ollama 0.11.8 segfaults on full schemas and
-        # aborts to `{}` with `format=json` on reasoning models. Schema is
-        # injected into the system prompt instead; we extract the trailing
-        # JSON blob from the response and validate via Pydantic.
-        assert "format" not in body_json
+        # format is "json" (loose mode): Ollama 0.11.8 segfaults on full
+        # Pydantic schemas. We pass the schema via the system prompt
+        # upstream and validate on parse.
+        assert body_json["format"] == "json"
        assert body_json["options"]["temperature"] == 0.2
        assert "reasoning_effort" not in body_json
        # A schema-guidance system message is prepended to the caller's