fix(genai): sanitise Optional for Ollama (#37 )

fix(genai): strip null branches from anyOf before sending to Ollama
Ollama 0.11.8's llama.cpp structured-output implementation segfaults on Pydantic v2's standard Optional pattern: {"anyOf": [{"type": "string"}, {"type": "null"}]} Confirmed on the deploy host: /api/chat request with the MVP's ProvenanceWrappedResponse schema crashed Ollama with SIGSEGV; the client saw httpx RemoteProtocolError → IX_002_000. New _sanitise_schema_for_ollama walks the schema recursively and drops "type: null" branches from every anyOf. Single-branch unions are inlined so sibling keys (default, title) survive. This only narrows what the LLM is *told* it may emit; Pydantic still validates the real response body against the original schema and accepts None for Optional fields if they were absent or explicitly null. Existing unit tests updated: the "happy path" test no longer pins the format to `_Schema.model_json_schema()` verbatim — instead it asserts the sanitisation effect on a known-Optional field. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 11:48:43 +00:00 · 2026-04-18 13:48:26 +02:00
2 changed files with 58 additions and 2 deletions
--- a/src/ix/genai/ollama_client.py
+++ b/src/ix/genai/ollama_client.py
@ -168,7 +168,9 @@ class OllamaClient:
            "model": request_kwargs.get("model"),
            "messages": messages,
            "stream": False,
-            "format": response_schema.model_json_schema(),
+            "format": _sanitise_schema_for_ollama(
+                response_schema.model_json_schema()
+            ),
        }

        options: dict[str, Any] = {}
@ -200,4 +202,52 @@ class OllamaClient:
        return out


+def _sanitise_schema_for_ollama(schema: Any) -> Any:
+    """Strip null branches from ``anyOf`` unions.
+
+    Ollama 0.11.8's llama.cpp structured-output implementation segfaults on
+    Pydantic v2's standard Optional pattern::
+
+        {"anyOf": [{"type": "string"}, {"type": "null"}]}
+
+    We collapse any ``anyOf`` that includes a ``{"type": "null"}`` entry to
+    its non-null branch — single branch becomes that branch inline; multiple
+    branches keep the union without null. This only narrows what the LLM is
+    *told* it may emit; Pydantic still validates the real response and can
+    accept ``None`` at parse time if the field is ``Optional``.
+
+    Walk is recursive and structure-preserving. Other ``anyOf`` shapes (e.g.
+    polymorphic unions without null) are left alone.
+    """
+    if isinstance(schema, dict):
+        cleaned: dict[str, Any] = {}
+        for key, value in schema.items():
+            if key == "anyOf" and isinstance(value, list):
+                non_null = [
+                    _sanitise_schema_for_ollama(branch)
+                    for branch in value
+                    if not (isinstance(branch, dict) and branch.get("type") == "null")
+                ]
+                if len(non_null) == 1:
+                    # Inline the single remaining branch; merge its keys into the
+                    # parent so siblings like ``default``/``title`` are preserved.
+                    only = non_null[0]
+                    if isinstance(only, dict):
+                        for ok, ov in only.items():
+                            cleaned.setdefault(ok, ov)
+                    else:
+                        cleaned[key] = non_null
+                elif len(non_null) == 0:
+                    # Pathological: nothing left. Fall back to a permissive type.
+                    cleaned["type"] = "string"
+                else:
+                    cleaned[key] = non_null
+            else:
+                cleaned[key] = _sanitise_schema_for_ollama(value)
+        return cleaned
+    if isinstance(schema, list):
+        return [_sanitise_schema_for_ollama(item) for item in schema]
+    return schema
+
+
 __all__ = ["OllamaClient"]
--- a/tests/unit/test_ollama_client.py
+++ b/tests/unit/test_ollama_client.py
@ -79,7 +79,13 @@ class TestInvokeHappyPath:
        body_json = json.loads(body)
        assert body_json["model"] == "gpt-oss:20b"
        assert body_json["stream"] is False
-        assert body_json["format"] == _Schema.model_json_schema()
+        # Format is the pydantic schema with Optional `anyOf [T, null]`
+        # patterns collapsed to just T — Ollama 0.11.8 segfaults on the
+        # anyOf+null shape, so we sanitise before sending.
+        fmt = body_json["format"]
+        assert fmt["properties"]["bank_name"] == {"title": "Bank Name", "type": "string"}
+        assert fmt["properties"]["account_number"]["type"] == "string"
+        assert "anyOf" not in fmt["properties"]["account_number"]
        assert body_json["options"]["temperature"] == 0.2
        assert "reasoning_effort" not in body_json
        assert body_json["messages"] == [