fix(genai): extract trailing JSON (#41)
Some checks are pending
tests / test (push) Waiting to run
Some checks are pending
tests / test (push) Waiting to run
This commit is contained in:
commit
95a576f744
2 changed files with 50 additions and 6 deletions
|
|
@ -96,8 +96,9 @@ class OllamaClient:
|
||||||
) from exc
|
) from exc
|
||||||
|
|
||||||
content = (payload.get("message") or {}).get("content") or ""
|
content = (payload.get("message") or {}).get("content") or ""
|
||||||
|
json_blob = _extract_json_blob(content)
|
||||||
try:
|
try:
|
||||||
parsed = response_schema.model_validate_json(content)
|
parsed = response_schema.model_validate_json(json_blob)
|
||||||
except ValidationError as exc:
|
except ValidationError as exc:
|
||||||
raise IXException(
|
raise IXException(
|
||||||
IXErrorCode.IX_002_001,
|
IXErrorCode.IX_002_001,
|
||||||
|
|
@ -186,7 +187,12 @@ class OllamaClient:
|
||||||
"model": request_kwargs.get("model"),
|
"model": request_kwargs.get("model"),
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"stream": False,
|
"stream": False,
|
||||||
"format": "json",
|
# NOTE: format is deliberately omitted. `format="json"` made
|
||||||
|
# reasoning models (qwen3) abort after emitting `{}` because the
|
||||||
|
# constrained sampler terminated before the chain-of-thought
|
||||||
|
# finished; `format=<schema>` segfaulted Ollama 0.11.8. Letting
|
||||||
|
# the model stream freely and then extracting the trailing JSON
|
||||||
|
# blob works for both reasoning and non-reasoning models.
|
||||||
}
|
}
|
||||||
|
|
||||||
options: dict[str, Any] = {}
|
options: dict[str, Any] = {}
|
||||||
|
|
@ -218,6 +224,43 @@ class OllamaClient:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_json_blob(text: str) -> str:
|
||||||
|
"""Return the outermost balanced JSON object in ``text``.
|
||||||
|
|
||||||
|
Reasoning models (qwen3, deepseek-r1) wrap their real answer in
|
||||||
|
``<think>…</think>`` blocks. Other models sometimes prefix prose or
|
||||||
|
fence the JSON in ```json``` code blocks. Finding the last balanced
|
||||||
|
``{…}`` is the cheapest robust parse that works for all three shapes;
|
||||||
|
a malformed response yields the full text and Pydantic catches it
|
||||||
|
downstream as ``IX_002_001``.
|
||||||
|
"""
|
||||||
|
start = text.find("{")
|
||||||
|
if start < 0:
|
||||||
|
return text
|
||||||
|
depth = 0
|
||||||
|
in_string = False
|
||||||
|
escaped = False
|
||||||
|
for i in range(start, len(text)):
|
||||||
|
ch = text[i]
|
||||||
|
if in_string:
|
||||||
|
if escaped:
|
||||||
|
escaped = False
|
||||||
|
elif ch == "\\":
|
||||||
|
escaped = True
|
||||||
|
elif ch == '"':
|
||||||
|
in_string = False
|
||||||
|
continue
|
||||||
|
if ch == '"':
|
||||||
|
in_string = True
|
||||||
|
elif ch == "{":
|
||||||
|
depth += 1
|
||||||
|
elif ch == "}":
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
return text[start : i + 1]
|
||||||
|
return text[start:]
|
||||||
|
|
||||||
|
|
||||||
def _inject_schema_system_message(
|
def _inject_schema_system_message(
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
response_schema: type[BaseModel],
|
response_schema: type[BaseModel],
|
||||||
|
|
|
||||||
|
|
@ -79,10 +79,11 @@ class TestInvokeHappyPath:
|
||||||
body_json = json.loads(body)
|
body_json = json.loads(body)
|
||||||
assert body_json["model"] == "gpt-oss:20b"
|
assert body_json["model"] == "gpt-oss:20b"
|
||||||
assert body_json["stream"] is False
|
assert body_json["stream"] is False
|
||||||
# format is "json" (loose mode): Ollama 0.11.8 segfaults on full
|
# No `format` is sent: Ollama 0.11.8 segfaults on full schemas and
|
||||||
# Pydantic schemas. We pass the schema via the system prompt
|
# aborts to `{}` with `format=json` on reasoning models. Schema is
|
||||||
# upstream and validate on parse.
|
# injected into the system prompt instead; we extract the trailing
|
||||||
assert body_json["format"] == "json"
|
# JSON blob from the response and validate via Pydantic.
|
||||||
|
assert "format" not in body_json
|
||||||
assert body_json["options"]["temperature"] == 0.2
|
assert body_json["options"]["temperature"] == 0.2
|
||||||
assert "reasoning_effort" not in body_json
|
assert "reasoning_effort" not in body_json
|
||||||
# A schema-guidance system message is prepended to the caller's
|
# A schema-guidance system message is prepended to the caller's
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue