From 34f8268cd53ec4cf8fa8c6e94142c7001f064b9c Mon Sep 17 00:00:00 2001
From: Dirk Riemann <ditori@gmail.com>
Date: Sat, 18 Apr 2026 14:02:25 +0200
Subject: [PATCH] fix(genai): inject JSON schema into Ollama system prompt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

format=json loose mode gives valid JSON but no shape — models default
to emitting {} when the system prompt doesn't list fields. Prepend a
schema-guidance system message with the full Pydantic schema (after
the existing null-branch sanitiser) so the model sees exactly what
shape to produce. Pydantic still validates on parse.

Unit tests updated to check the schema message is prepended without
disturbing the caller's own messages.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/ix/genai/ollama_client.py    | 52 ++++++++++++++++++++++++++------
 tests/unit/test_ollama_client.py | 12 ++++++--
 2 files changed, 52 insertions(+), 12 deletions(-)
diff --git a/src/ix/genai/ollama_client.py b/src/ix/genai/ollama_client.py
index 85578bc..0cc3783 100644
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@@ -162,22 +162,26 @@ class OllamaClient:
         """Map provider-neutral kwargs to Ollama's /api/chat body.
 
         Schema strategy for Ollama 0.11.8: we pass ``format="json"`` (loose
-        JSON mode) rather than the full Pydantic schema. The llama.cpp
-        structured-output implementation in 0.11.8 segfaults on schemas
-        involving ``anyOf``, ``$ref``, or ``pattern`` — which Pydantic v2
-        emits for Optional / nested-model / Decimal fields.
+        JSON mode) and bake the Pydantic schema into a system message
+        ahead of the caller's own system prompt. Rationale:
 
-        In loose JSON mode Ollama guarantees only syntactically-valid
-        JSON; we enforce the schema on our side by catching the Pydantic
-        ``ValidationError`` at parse time and raising IX_002_001. The
-        system prompt (built upstream in GenAIStep) already tells the
-        model what JSON shape to emit, so loose mode is the right
-        abstraction layer here.
+        * The full Pydantic schema as ``format=<schema>`` crashes llama.cpp's
+          structured-output implementation (SIGSEGV) on every non-trivial
+          shape — ``anyOf`` / ``$ref`` / ``pattern`` all trigger it.
+        * ``format="json"`` alone guarantees valid JSON but not the shape;
+          models routinely return ``{}`` when not told what fields to emit.
+        * Injecting the schema into the prompt is the cheapest way to
+          get both: the model sees the expected shape explicitly, Pydantic
+          validates the response at parse time (IX_002_001 on mismatch).
+
+        Non-Ollama ``GenAIClient`` impls can ignore this behaviour and use
+        native structured-output (``response_format`` on OpenAI, etc.).
         """
 
         messages = self._translate_messages(
             list(request_kwargs.get("messages") or [])
         )
+        messages = _inject_schema_system_message(messages, response_schema)
         body: dict[str, Any] = {
             "model": request_kwargs.get("model"),
             "messages": messages,
@@ -214,6 +218,34 @@ class OllamaClient:
         return out
 
 
+def _inject_schema_system_message(
+    messages: list[dict[str, Any]],
+    response_schema: type[BaseModel],
+) -> list[dict[str, Any]]:
+    """Prepend a system message that pins the expected JSON shape.
+
+    Ollama's ``format="json"`` mode guarantees valid JSON but not the
+    field set or names. We emit the Pydantic schema as JSON and
+    instruct the model to match it. If the caller already provides a
+    system message, we prepend ours; otherwise ours becomes the first
+    system turn.
+    """
+    import json as _json
+
+    schema_json = _json.dumps(
+        _sanitise_schema_for_ollama(response_schema.model_json_schema()),
+        indent=2,
+    )
+    guidance = (
+        "Respond ONLY with a single JSON object matching this JSON Schema "
+        "exactly. No prose, no code fences, no explanations. All top-level "
+        "properties listed in `required` MUST be present. Use null for "
+        "fields you cannot confidently extract. The JSON Schema:\n"
+        f"{schema_json}"
+    )
+    return [{"role": "system", "content": guidance}, *messages]
+
+
 def _sanitise_schema_for_ollama(schema: Any) -> Any:
     """Strip null branches from ``anyOf`` unions.
 
diff --git a/tests/unit/test_ollama_client.py b/tests/unit/test_ollama_client.py
index 6a5f203..57f9f01 100644
--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@@ -85,7 +85,12 @@ class TestInvokeHappyPath:
         assert body_json["format"] == "json"
         assert body_json["options"]["temperature"] == 0.2
         assert "reasoning_effort" not in body_json
-        assert body_json["messages"] == [
+        # A schema-guidance system message is prepended to the caller's
+        # messages so Ollama (format=json loose mode) emits the right shape.
+        msgs = body_json["messages"]
+        assert msgs[0]["role"] == "system"
+        assert "JSON Schema" in msgs[0]["content"]
+        assert msgs[1:] == [
             {"role": "system", "content": "You extract."},
             {"role": "user", "content": "Doc body"},
         ]
@@ -119,7 +124,10 @@ class TestInvokeHappyPath:
         import json
 
         request_body = json.loads(httpx_mock.get_requests()[0].read())
-        assert request_body["messages"] == [
+        # First message is the auto-injected schema guidance; after that
+        # the caller's user message has its text parts joined.
+        assert request_body["messages"][0]["role"] == "system"
+        assert request_body["messages"][1:] == [
             {"role": "user", "content": "part-a\npart-b"}
         ]