fix(genai): inject JSON schema into Ollama system prompt

format=json loose mode gives valid JSON but no shape — models default to emitting {} when the system prompt doesn't list fields. Prepend a schema-guidance system message with the full Pydantic schema (after the existing null-branch sanitiser) so the model sees exactly what shape to produce. Pydantic still validates on parse. Unit tests updated to check the schema message is prepended without disturbing the caller's own messages. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 14:02:25 +02:00 · 2026-04-18 14:02:25 +02:00 · 34f8268cd5
commit 34f8268cd5
parent 9c73895318
2 changed files with 52 additions and 12 deletions
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@ -162,22 +162,26 @@ class OllamaClient:
        """Map provider-neutral kwargs to Ollama's /api/chat body.
        Schema strategy for Ollama 0.11.8: we pass ``format="json"`` (loose
-        JSON mode) rather than the full Pydantic schema. The llama.cpp
+        JSON mode) and bake the Pydantic schema into a system message
-        structured-output implementation in 0.11.8 segfaults on schemas
+        ahead of the caller's own system prompt. Rationale:
        involving ``anyOf``, ``$ref``, or ``pattern`` — which Pydantic v2
        emits for Optional / nested-model / Decimal fields.
-        In loose JSON mode Ollama guarantees only syntactically-valid
+        * The full Pydantic schema as ``format=<schema>`` crashes llama.cpp's
-        JSON; we enforce the schema on our side by catching the Pydantic
+          structured-output implementation (SIGSEGV) on every non-trivial
-        ``ValidationError`` at parse time and raising IX_002_001. The
+          shape — ``anyOf`` / ``$ref`` / ``pattern`` all trigger it.
-        system prompt (built upstream in GenAIStep) already tells the
+        * ``format="json"`` alone guarantees valid JSON but not the shape;
-        model what JSON shape to emit, so loose mode is the right
+          models routinely return ``{}`` when not told what fields to emit.
-        abstraction layer here.
+        * Injecting the schema into the prompt is the cheapest way to
          get both: the model sees the expected shape explicitly, Pydantic
          validates the response at parse time (IX_002_001 on mismatch).
        Non-Ollama ``GenAIClient`` impls can ignore this behaviour and use
        native structured-output (``response_format`` on OpenAI, etc.).
        """
        messages = self._translate_messages(
            list(request_kwargs.get("messages") or [])
        )
        messages = _inject_schema_system_message(messages, response_schema)
        body: dict[str, Any] = {
            "model": request_kwargs.get("model"),
            "messages": messages,
@ -214,6 +218,34 @@ class OllamaClient:
        return out
 def _inject_schema_system_message(
    messages: list[dict[str, Any]],
    response_schema: type[BaseModel],
 ) -> list[dict[str, Any]]:
    """Prepend a system message that pins the expected JSON shape.
    Ollama's ``format="json"`` mode guarantees valid JSON but not the
    field set or names. We emit the Pydantic schema as JSON and
    instruct the model to match it. If the caller already provides a
    system message, we prepend ours; otherwise ours becomes the first
    system turn.
    """
    import json as _json
    schema_json = _json.dumps(
        _sanitise_schema_for_ollama(response_schema.model_json_schema()),
        indent=2,
    )
    guidance = (
        "Respond ONLY with a single JSON object matching this JSON Schema "
        "exactly. No prose, no code fences, no explanations. All top-level "
        "properties listed in `required` MUST be present. Use null for "
        "fields you cannot confidently extract. The JSON Schema:\n"
        f"{schema_json}"
    )
    return [{"role": "system", "content": guidance}, *messages]
 def _sanitise_schema_for_ollama(schema: Any) -> Any:
    """Strip null branches from ``anyOf`` unions.
--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@ -85,7 +85,12 @@ class TestInvokeHappyPath:
        assert body_json["format"] == "json"
        assert body_json["options"]["temperature"] == 0.2
        assert "reasoning_effort" not in body_json
-        assert body_json["messages"] == [
+        # A schema-guidance system message is prepended to the caller's
        # messages so Ollama (format=json loose mode) emits the right shape.
        msgs = body_json["messages"]
        assert msgs[0]["role"] == "system"
        assert "JSON Schema" in msgs[0]["content"]
        assert msgs[1:] == [
            {"role": "system", "content": "You extract."},
            {"role": "user", "content": "Doc body"},
        ]
@ -119,7 +124,10 @@ class TestInvokeHappyPath:
        import json
        request_body = json.loads(httpx_mock.get_requests()[0].read())
-        assert request_body["messages"] == [
+        # First message is the auto-injected schema guidance; after that
        # the caller's user message has its text parts joined.
        assert request_body["messages"][0]["role"] == "system"
        assert request_body["messages"][1:] == [
            {"role": "user", "content": "part-a\npart-b"}
        ]