2 changed files with 12 additions and 52 deletions
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@ -162,26 +162,22 @@ class OllamaClient:
        """Map provider-neutral kwargs to Ollama's /api/chat body.

        Schema strategy for Ollama 0.11.8: we pass ``format="json"`` (loose
-        JSON mode) and bake the Pydantic schema into a system message
-        ahead of the caller's own system prompt. Rationale:
+        JSON mode) rather than the full Pydantic schema. The llama.cpp
+        structured-output implementation in 0.11.8 segfaults on schemas
+        involving ``anyOf``, ``$ref``, or ``pattern`` — which Pydantic v2
+        emits for Optional / nested-model / Decimal fields.

-        * The full Pydantic schema as ``format=<schema>`` crashes llama.cpp's
-          structured-output implementation (SIGSEGV) on every non-trivial
-          shape — ``anyOf`` / ``$ref`` / ``pattern`` all trigger it.
-        * ``format="json"`` alone guarantees valid JSON but not the shape;
-          models routinely return ``{}`` when not told what fields to emit.
-        * Injecting the schema into the prompt is the cheapest way to
-          get both: the model sees the expected shape explicitly, Pydantic
-          validates the response at parse time (IX_002_001 on mismatch).
-
-        Non-Ollama ``GenAIClient`` impls can ignore this behaviour and use
-        native structured-output (``response_format`` on OpenAI, etc.).
+        In loose JSON mode Ollama guarantees only syntactically-valid
+        JSON; we enforce the schema on our side by catching the Pydantic
+        ``ValidationError`` at parse time and raising IX_002_001. The
+        system prompt (built upstream in GenAIStep) already tells the
+        model what JSON shape to emit, so loose mode is the right
+        abstraction layer here.
        """

        messages = self._translate_messages(
            list(request_kwargs.get("messages") or [])
        )
-        messages = _inject_schema_system_message(messages, response_schema)
        body: dict[str, Any] = {
            "model": request_kwargs.get("model"),
            "messages": messages,
@ -218,34 +214,6 @@ class OllamaClient:
        return out


-def _inject_schema_system_message(
-    messages: list[dict[str, Any]],
-    response_schema: type[BaseModel],
-) -> list[dict[str, Any]]:
-    """Prepend a system message that pins the expected JSON shape.
-
-    Ollama's ``format="json"`` mode guarantees valid JSON but not the
-    field set or names. We emit the Pydantic schema as JSON and
-    instruct the model to match it. If the caller already provides a
-    system message, we prepend ours; otherwise ours becomes the first
-    system turn.
-    """
-    import json as _json
-
-    schema_json = _json.dumps(
-        _sanitise_schema_for_ollama(response_schema.model_json_schema()),
-        indent=2,
-    )
-    guidance = (
-        "Respond ONLY with a single JSON object matching this JSON Schema "
-        "exactly. No prose, no code fences, no explanations. All top-level "
-        "properties listed in `required` MUST be present. Use null for "
-        "fields you cannot confidently extract. The JSON Schema:\n"
-        f"{schema_json}"
-    )
-    return [{"role": "system", "content": guidance}, *messages]
-
-
 def _sanitise_schema_for_ollama(schema: Any) -> Any:
    """Strip null branches from ``anyOf`` unions.

--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@ -85,12 +85,7 @@ class TestInvokeHappyPath:
        assert body_json["format"] == "json"
        assert body_json["options"]["temperature"] == 0.2
        assert "reasoning_effort" not in body_json
-        # A schema-guidance system message is prepended to the caller's
-        # messages so Ollama (format=json loose mode) emits the right shape.
-        msgs = body_json["messages"]
-        assert msgs[0]["role"] == "system"
-        assert "JSON Schema" in msgs[0]["content"]
-        assert msgs[1:] == [
+        assert body_json["messages"] == [
            {"role": "system", "content": "You extract."},
            {"role": "user", "content": "Doc body"},
        ]
@ -124,10 +119,7 @@ class TestInvokeHappyPath:
        import json

        request_body = json.loads(httpx_mock.get_requests()[0].read())
-        # First message is the auto-injected schema guidance; after that
-        # the caller's user message has its text parts joined.
-        assert request_body["messages"][0]["role"] == "system"
-        assert request_body["messages"][1:] == [
+        assert request_body["messages"] == [
            {"role": "user", "content": "part-a\npart-b"}
        ]