Compare commits

...

2 commits

Author SHA1 Message Date
763407ba1c fix(genai): schema in prompt (#40)
Some checks failed
tests / test (push) Has been cancelled
2026-04-18 12:02:38 +00:00
34f8268cd5 fix(genai): inject JSON schema into Ollama system prompt
All checks were successful
tests / test (push) Successful in 1m8s
tests / test (pull_request) Successful in 1m18s
format=json loose mode gives valid JSON but no shape — models default
to emitting {} when the system prompt doesn't list fields. Prepend a
schema-guidance system message with the full Pydantic schema (after
the existing null-branch sanitiser) so the model sees exactly what
shape to produce. Pydantic still validates on parse.

Unit tests updated to check the schema message is prepended without
disturbing the caller's own messages.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 14:02:25 +02:00
2 changed files with 52 additions and 12 deletions

View file

@ -162,22 +162,26 @@ class OllamaClient:
"""Map provider-neutral kwargs to Ollama's /api/chat body.
Schema strategy for Ollama 0.11.8: we pass ``format="json"`` (loose
JSON mode) rather than the full Pydantic schema. The llama.cpp
structured-output implementation in 0.11.8 segfaults on schemas
involving ``anyOf``, ``$ref``, or ``pattern`` which Pydantic v2
emits for Optional / nested-model / Decimal fields.
JSON mode) and bake the Pydantic schema into a system message
ahead of the caller's own system prompt. Rationale:
In loose JSON mode Ollama guarantees only syntactically-valid
JSON; we enforce the schema on our side by catching the Pydantic
``ValidationError`` at parse time and raising IX_002_001. The
system prompt (built upstream in GenAIStep) already tells the
model what JSON shape to emit, so loose mode is the right
abstraction layer here.
* The full Pydantic schema as ``format=<schema>`` crashes llama.cpp's
structured-output implementation (SIGSEGV) on every non-trivial
shape ``anyOf`` / ``$ref`` / ``pattern`` all trigger it.
* ``format="json"`` alone guarantees valid JSON but not the shape;
models routinely return ``{}`` when not told what fields to emit.
* Injecting the schema into the prompt is the cheapest way to
get both: the model sees the expected shape explicitly, Pydantic
validates the response at parse time (IX_002_001 on mismatch).
Non-Ollama ``GenAIClient`` impls can ignore this behaviour and use
native structured-output (``response_format`` on OpenAI, etc.).
"""
messages = self._translate_messages(
list(request_kwargs.get("messages") or [])
)
messages = _inject_schema_system_message(messages, response_schema)
body: dict[str, Any] = {
"model": request_kwargs.get("model"),
"messages": messages,
@ -214,6 +218,34 @@ class OllamaClient:
return out
def _inject_schema_system_message(
messages: list[dict[str, Any]],
response_schema: type[BaseModel],
) -> list[dict[str, Any]]:
"""Prepend a system message that pins the expected JSON shape.
Ollama's ``format="json"`` mode guarantees valid JSON but not the
field set or names. We emit the Pydantic schema as JSON and
instruct the model to match it. If the caller already provides a
system message, we prepend ours; otherwise ours becomes the first
system turn.
"""
import json as _json
schema_json = _json.dumps(
_sanitise_schema_for_ollama(response_schema.model_json_schema()),
indent=2,
)
guidance = (
"Respond ONLY with a single JSON object matching this JSON Schema "
"exactly. No prose, no code fences, no explanations. All top-level "
"properties listed in `required` MUST be present. Use null for "
"fields you cannot confidently extract. The JSON Schema:\n"
f"{schema_json}"
)
return [{"role": "system", "content": guidance}, *messages]
def _sanitise_schema_for_ollama(schema: Any) -> Any:
"""Strip null branches from ``anyOf`` unions.

View file

@ -85,7 +85,12 @@ class TestInvokeHappyPath:
assert body_json["format"] == "json"
assert body_json["options"]["temperature"] == 0.2
assert "reasoning_effort" not in body_json
assert body_json["messages"] == [
# A schema-guidance system message is prepended to the caller's
# messages so Ollama (format=json loose mode) emits the right shape.
msgs = body_json["messages"]
assert msgs[0]["role"] == "system"
assert "JSON Schema" in msgs[0]["content"]
assert msgs[1:] == [
{"role": "system", "content": "You extract."},
{"role": "user", "content": "Doc body"},
]
@ -119,7 +124,10 @@ class TestInvokeHappyPath:
import json
request_body = json.loads(httpx_mock.get_requests()[0].read())
assert request_body["messages"] == [
# First message is the auto-injected schema guidance; after that
# the caller's user message has its text parts joined.
assert request_body["messages"][0]["role"] == "system"
assert request_body["messages"][1:] == [
{"role": "user", "content": "part-a\npart-b"}
]