fix(genai): extract trailing JSON (#41)

2026-04-18 12:05:46 +00:00 · 2026-04-18 12:05:46 +00:00 · 95a576f744
commit 95a576f744
parent 763407ba1c 81e3b9a7d0
2 changed files with 50 additions and 6 deletions
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@ -96,8 +96,9 @@ class OllamaClient:
            ) from exc
        content = (payload.get("message") or {}).get("content") or ""
        json_blob = _extract_json_blob(content)
        try:
-            parsed = response_schema.model_validate_json(content)
+            parsed = response_schema.model_validate_json(json_blob)
        except ValidationError as exc:
            raise IXException(
                IXErrorCode.IX_002_001,
@ -186,7 +187,12 @@ class OllamaClient:
            "model": request_kwargs.get("model"),
            "messages": messages,
            "stream": False,
-            "format": "json",
+            # NOTE: format is deliberately omitted. `format="json"` made
            # reasoning models (qwen3) abort after emitting `{}` because the
            # constrained sampler terminated before the chain-of-thought
            # finished; `format=<schema>` segfaulted Ollama 0.11.8. Letting
            # the model stream freely and then extracting the trailing JSON
            # blob works for both reasoning and non-reasoning models.
        }
        options: dict[str, Any] = {}
@ -218,6 +224,43 @@ class OllamaClient:
        return out
 def _extract_json_blob(text: str) -> str:
    """Return the outermost balanced JSON object in ``text``.
    Reasoning models (qwen3, deepseek-r1) wrap their real answer in
    ``<think>…</think>`` blocks. Other models sometimes prefix prose or
    fence the JSON in ```json``` code blocks. Finding the last balanced
    ``{…}`` is the cheapest robust parse that works for all three shapes;
    a malformed response yields the full text and Pydantic catches it
    downstream as ``IX_002_001``.
    """
    start = text.find("{")
    if start < 0:
        return text
    depth = 0
    in_string = False
    escaped = False
    for i in range(start, len(text)):
        ch = text[i]
        if in_string:
            if escaped:
                escaped = False
            elif ch == "\\":
                escaped = True
            elif ch == '"':
                in_string = False
            continue
        if ch == '"':
            in_string = True
        elif ch == "{":
            depth += 1
        elif ch == "}":
            depth -= 1
            if depth == 0:
                return text[start : i + 1]
    return text[start:]
 def _inject_schema_system_message(
    messages: list[dict[str, Any]],
    response_schema: type[BaseModel],
--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@ -79,10 +79,11 @@ class TestInvokeHappyPath:
        body_json = json.loads(body)
        assert body_json["model"] == "gpt-oss:20b"
        assert body_json["stream"] is False
-        # format is "json" (loose mode): Ollama 0.11.8 segfaults on full
+        # No `format` is sent: Ollama 0.11.8 segfaults on full schemas and
-        # Pydantic schemas. We pass the schema via the system prompt
+        # aborts to `{}` with `format=json` on reasoning models. Schema is
-        # upstream and validate on parse.
+        # injected into the system prompt instead; we extract the trailing
-        assert body_json["format"] == "json"
+        # JSON blob from the response and validate via Pydantic.
        assert "format" not in body_json
        assert body_json["options"]["temperature"] == 0.2
        assert "reasoning_effort" not in body_json
        # A schema-guidance system message is prepended to the caller's