From 2efc4d108825f698aeb5273be06a745acf3471c6 Mon Sep 17 00:00:00 2001 From: Dirk Riemann Date: Sat, 18 Apr 2026 13:59:04 +0200 Subject: [PATCH] fix(genai): send format="json" (loose mode) to Ollama MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ollama 0.11.8 segfaults on any Pydantic-shaped structured-output schema with $ref, anyOf, or pattern — confirmed on the deploy host with the simplest MVP case (BankStatementHeader alone). The earlier null-stripping sanitiser wasn't enough. Switch to format="json", which is "emit valid JSON" mode. We're already describing the exact JSON shape in the system prompt (via GenAIStep + the use case's citation instruction appendix) and validating the response body through Pydantic on parse — which raises IX_002_001 on schema mismatch, exactly as before. Stronger guarantees can come back later via a newer Ollama, an API fix, or a different GenAIClient impl. None of that is needed for the MVP to work end to end. Unit tests: the sanitiser left in place (harmless, still tested). The "happy path" test now asserts format == "json". Co-Authored-By: Claude Opus 4.7 (1M context) --- src/ix/genai/ollama_client.py | 20 ++++++++++++++++---- tests/unit/test_ollama_client.py | 11 ++++------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/ix/genai/ollama_client.py b/src/ix/genai/ollama_client.py index c6d100f..85578bc 100644 --- a/src/ix/genai/ollama_client.py +++ b/src/ix/genai/ollama_client.py @@ -159,7 +159,21 @@ class OllamaClient: request_kwargs: dict[str, Any], response_schema: type[BaseModel], ) -> dict[str, Any]: - """Map provider-neutral kwargs to Ollama's /api/chat body.""" + """Map provider-neutral kwargs to Ollama's /api/chat body. + + Schema strategy for Ollama 0.11.8: we pass ``format="json"`` (loose + JSON mode) rather than the full Pydantic schema. The llama.cpp + structured-output implementation in 0.11.8 segfaults on schemas + involving ``anyOf``, ``$ref``, or ``pattern`` — which Pydantic v2 + emits for Optional / nested-model / Decimal fields. + + In loose JSON mode Ollama guarantees only syntactically-valid + JSON; we enforce the schema on our side by catching the Pydantic + ``ValidationError`` at parse time and raising IX_002_001. The + system prompt (built upstream in GenAIStep) already tells the + model what JSON shape to emit, so loose mode is the right + abstraction layer here. + """ messages = self._translate_messages( list(request_kwargs.get("messages") or []) @@ -168,9 +182,7 @@ class OllamaClient: "model": request_kwargs.get("model"), "messages": messages, "stream": False, - "format": _sanitise_schema_for_ollama( - response_schema.model_json_schema() - ), + "format": "json", } options: dict[str, Any] = {} diff --git a/tests/unit/test_ollama_client.py b/tests/unit/test_ollama_client.py index e68df4e..6a5f203 100644 --- a/tests/unit/test_ollama_client.py +++ b/tests/unit/test_ollama_client.py @@ -79,13 +79,10 @@ class TestInvokeHappyPath: body_json = json.loads(body) assert body_json["model"] == "gpt-oss:20b" assert body_json["stream"] is False - # Format is the pydantic schema with Optional `anyOf [T, null]` - # patterns collapsed to just T — Ollama 0.11.8 segfaults on the - # anyOf+null shape, so we sanitise before sending. - fmt = body_json["format"] - assert fmt["properties"]["bank_name"] == {"title": "Bank Name", "type": "string"} - assert fmt["properties"]["account_number"]["type"] == "string" - assert "anyOf" not in fmt["properties"]["account_number"] + # format is "json" (loose mode): Ollama 0.11.8 segfaults on full + # Pydantic schemas. We pass the schema via the system prompt + # upstream and validate on parse. + assert body_json["format"] == "json" assert body_json["options"]["temperature"] == 0.2 assert "reasoning_effort" not in body_json assert body_json["messages"] == [