fix(genai): schema in prompt (#40 )

fix(genai): inject JSON schema into Ollama system prompt
format=json loose mode gives valid JSON but no shape — models default to emitting {} when the system prompt doesn't list fields. Prepend a schema-guidance system message with the full Pydantic schema (after the existing null-branch sanitiser) so the model sees exactly what shape to produce. Pydantic still validates on parse. Unit tests updated to check the schema message is prepended without disturbing the caller's own messages. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 12:02:38 +00:00 · 2026-04-18 14:02:25 +02:00
2 changed files with 52 additions and 12 deletions
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@ -162,22 +162,26 @@ class OllamaClient:
        """Map provider-neutral kwargs to Ollama's /api/chat body.

        Schema strategy for Ollama 0.11.8: we pass ``format="json"`` (loose
-        JSON mode) rather than the full Pydantic schema. The llama.cpp
-        structured-output implementation in 0.11.8 segfaults on schemas
-        involving ``anyOf``, ``$ref``, or ``pattern`` — which Pydantic v2
-        emits for Optional / nested-model / Decimal fields.
+        JSON mode) and bake the Pydantic schema into a system message
+        ahead of the caller's own system prompt. Rationale:

-        In loose JSON mode Ollama guarantees only syntactically-valid
-        JSON; we enforce the schema on our side by catching the Pydantic
-        ``ValidationError`` at parse time and raising IX_002_001. The
-        system prompt (built upstream in GenAIStep) already tells the
-        model what JSON shape to emit, so loose mode is the right
-        abstraction layer here.
+        * The full Pydantic schema as ``format=<schema>`` crashes llama.cpp's
+          structured-output implementation (SIGSEGV) on every non-trivial
+          shape — ``anyOf`` / ``$ref`` / ``pattern`` all trigger it.
+        * ``format="json"`` alone guarantees valid JSON but not the shape;
+          models routinely return ``{}`` when not told what fields to emit.
+        * Injecting the schema into the prompt is the cheapest way to
+          get both: the model sees the expected shape explicitly, Pydantic
+          validates the response at parse time (IX_002_001 on mismatch).
+
+        Non-Ollama ``GenAIClient`` impls can ignore this behaviour and use
+        native structured-output (``response_format`` on OpenAI, etc.).
        """

        messages = self._translate_messages(
            list(request_kwargs.get("messages") or [])
        )
+        messages = _inject_schema_system_message(messages, response_schema)
        body: dict[str, Any] = {
            "model": request_kwargs.get("model"),
            "messages": messages,
@ -214,6 +218,34 @@ class OllamaClient:
        return out


+def _inject_schema_system_message(
+    messages: list[dict[str, Any]],
+    response_schema: type[BaseModel],
+) -> list[dict[str, Any]]:
+    """Prepend a system message that pins the expected JSON shape.
+
+    Ollama's ``format="json"`` mode guarantees valid JSON but not the
+    field set or names. We emit the Pydantic schema as JSON and
+    instruct the model to match it. If the caller already provides a
+    system message, we prepend ours; otherwise ours becomes the first
+    system turn.
+    """
+    import json as _json
+
+    schema_json = _json.dumps(
+        _sanitise_schema_for_ollama(response_schema.model_json_schema()),
+        indent=2,
+    )
+    guidance = (
+        "Respond ONLY with a single JSON object matching this JSON Schema "
+        "exactly. No prose, no code fences, no explanations. All top-level "
+        "properties listed in `required` MUST be present. Use null for "
+        "fields you cannot confidently extract. The JSON Schema:\n"
+        f"{schema_json}"
+    )
+    return [{"role": "system", "content": guidance}, *messages]
+
+
 def _sanitise_schema_for_ollama(schema: Any) -> Any:
    """Strip null branches from ``anyOf`` unions.

--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@ -85,7 +85,12 @@ class TestInvokeHappyPath:
        assert body_json["format"] == "json"
        assert body_json["options"]["temperature"] == 0.2
        assert "reasoning_effort" not in body_json
-        assert body_json["messages"] == [
+        # A schema-guidance system message is prepended to the caller's
+        # messages so Ollama (format=json loose mode) emits the right shape.
+        msgs = body_json["messages"]
+        assert msgs[0]["role"] == "system"
+        assert "JSON Schema" in msgs[0]["content"]
+        assert msgs[1:] == [
            {"role": "system", "content": "You extract."},
            {"role": "user", "content": "Doc body"},
        ]
@ -119,7 +124,10 @@ class TestInvokeHappyPath:
        import json

        request_body = json.loads(httpx_mock.get_requests()[0].read())
-        assert request_body["messages"] == [
+        # First message is the auto-injected schema guidance; after that
+        # the caller's user message has its text parts joined.
+        assert request_body["messages"][0]["role"] == "system"
+        assert request_body["messages"][1:] == [
            {"role": "user", "content": "part-a\npart-b"}
        ]