infoxtractor/tests/unit/test_ollama_client.py

"""Tests for :class:`OllamaClient` — hermetic, pytest-httpx-driven.

Covers spec §6 GenAIStep Ollama call contract:

* POST body shape (model / messages / format / stream / options).
* Response parsing → :class:`GenAIInvocationResult`.
* Error mapping: connection / timeout / 5xx → ``IX_002_000``;
  schema-violating body → ``IX_002_001``.
* ``selfcheck()``: tags-reachable + model-listed → ``ok``;
  reachable-but-missing → ``degraded``; unreachable → ``fail``.
"""

from __future__ import annotations

import httpx
import pytest
from pydantic import BaseModel
from pytest_httpx import HTTPXMock

from ix.errors import IXErrorCode, IXException
from ix.genai.ollama_client import OllamaClient


class _Schema(BaseModel):
    """Trivial structured-output schema for the round-trip tests."""

    bank_name: str
    account_number: str | None = None


def _ollama_chat_ok_body(content_json: str) -> dict:
    """Build a minimal Ollama /api/chat success body."""
    return {
        "model": "gpt-oss:20b",
        "message": {"role": "assistant", "content": content_json},
        "done": True,
        "eval_count": 42,
        "prompt_eval_count": 17,
    }


class TestInvokeHappyPath:
    async def test_posts_to_chat_endpoint_with_format_and_no_stream(
        self, httpx_mock: HTTPXMock
    ) -> None:
        httpx_mock.add_response(
            url="http://ollama.test:11434/api/chat",
            method="POST",
            json=_ollama_chat_ok_body('{"bank_name":"DKB","account_number":"DE89"}'),
        )

        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
        )
        result = await client.invoke(
            request_kwargs={
                "model": "gpt-oss:20b",
                "messages": [
                    {"role": "system", "content": "You extract."},
                    {"role": "user", "content": "Doc body"},
                ],
                "temperature": 0.2,
                "reasoning_effort": "high",  # dropped silently
            },
            response_schema=_Schema,
        )

        assert result.parsed == _Schema(bank_name="DKB", account_number="DE89")
        assert result.model_name == "gpt-oss:20b"
        assert result.usage.prompt_tokens == 17
        assert result.usage.completion_tokens == 42

        # Verify request shape.
        requests = httpx_mock.get_requests()
        assert len(requests) == 1
        body = requests[0].read().decode()
        import json

        body_json = json.loads(body)
        assert body_json["model"] == "gpt-oss:20b"
        assert body_json["stream"] is False
        # No `format` is sent: Ollama 0.11.8 segfaults on full schemas and
        # aborts to `{}` with `format=json` on reasoning models. Schema is
        # injected into the system prompt instead; we extract the trailing
        # JSON blob from the response and validate via Pydantic.
        assert "format" not in body_json
        assert body_json["options"]["temperature"] == 0.2
        assert "reasoning_effort" not in body_json
        # A schema-guidance system message is prepended to the caller's
        # messages so Ollama (format=json loose mode) emits the right shape.
        msgs = body_json["messages"]
        assert msgs[0]["role"] == "system"
        assert "JSON Schema" in msgs[0]["content"]
        assert msgs[1:] == [
            {"role": "system", "content": "You extract."},
            {"role": "user", "content": "Doc body"},
        ]

    async def test_text_parts_content_list_is_joined(
        self, httpx_mock: HTTPXMock
    ) -> None:
        httpx_mock.add_response(
            url="http://ollama.test:11434/api/chat",
            method="POST",
            json=_ollama_chat_ok_body('{"bank_name":"X"}'),
        )
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
        )
        await client.invoke(
            request_kwargs={
                "model": "gpt-oss:20b",
                "messages": [
                    {
                        "role": "user",
                        "content": [
                            {"type": "text", "text": "part-a"},
                            {"type": "text", "text": "part-b"},
                        ],
                    }
                ],
            },
            response_schema=_Schema,
        )
        import json

        request_body = json.loads(httpx_mock.get_requests()[0].read())
        # First message is the auto-injected schema guidance; after that
        # the caller's user message has its text parts joined.
        assert request_body["messages"][0]["role"] == "system"
        assert request_body["messages"][1:] == [
            {"role": "user", "content": "part-a\npart-b"}
        ]


class TestInvokeErrorPaths:
    async def test_connection_error_maps_to_002_000(
        self, httpx_mock: HTTPXMock
    ) -> None:
        httpx_mock.add_exception(httpx.ConnectError("refused"))
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=1.0
        )
        with pytest.raises(IXException) as ei:
            await client.invoke(
                request_kwargs={
                    "model": "gpt-oss:20b",
                    "messages": [{"role": "user", "content": "hi"}],
                },
                response_schema=_Schema,
            )
        assert ei.value.code is IXErrorCode.IX_002_000

    async def test_read_timeout_maps_to_002_000(self, httpx_mock: HTTPXMock) -> None:
        httpx_mock.add_exception(httpx.ReadTimeout("slow"))
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=0.5
        )
        with pytest.raises(IXException) as ei:
            await client.invoke(
                request_kwargs={
                    "model": "gpt-oss:20b",
                    "messages": [{"role": "user", "content": "hi"}],
                },
                response_schema=_Schema,
            )
        assert ei.value.code is IXErrorCode.IX_002_000

    async def test_500_maps_to_002_000_with_body_snippet(
        self, httpx_mock: HTTPXMock
    ) -> None:
        httpx_mock.add_response(
            url="http://ollama.test:11434/api/chat",
            method="POST",
            status_code=500,
            text="boom boom server broken",
        )
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
        )
        with pytest.raises(IXException) as ei:
            await client.invoke(
                request_kwargs={
                    "model": "gpt-oss:20b",
                    "messages": [{"role": "user", "content": "hi"}],
                },
                response_schema=_Schema,
            )
        assert ei.value.code is IXErrorCode.IX_002_000
        assert "boom" in (ei.value.detail or "")

    async def test_200_with_invalid_json_maps_to_002_001(
        self, httpx_mock: HTTPXMock
    ) -> None:
        httpx_mock.add_response(
            url="http://ollama.test:11434/api/chat",
            method="POST",
            json=_ollama_chat_ok_body("not-json"),
        )
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
        )
        with pytest.raises(IXException) as ei:
            await client.invoke(
                request_kwargs={
                    "model": "gpt-oss:20b",
                    "messages": [{"role": "user", "content": "hi"}],
                },
                response_schema=_Schema,
            )
        assert ei.value.code is IXErrorCode.IX_002_001

    async def test_200_with_schema_violation_maps_to_002_001(
        self, httpx_mock: HTTPXMock
    ) -> None:
        # Missing required `bank_name` field.
        httpx_mock.add_response(
            url="http://ollama.test:11434/api/chat",
            method="POST",
            json=_ollama_chat_ok_body('{"account_number":"DE89"}'),
        )
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
        )
        with pytest.raises(IXException) as ei:
            await client.invoke(
                request_kwargs={
                    "model": "gpt-oss:20b",
                    "messages": [{"role": "user", "content": "hi"}],
                },
                response_schema=_Schema,
            )
        assert ei.value.code is IXErrorCode.IX_002_001


class TestSelfcheck:
    async def test_selfcheck_ok_when_model_listed(
        self, httpx_mock: HTTPXMock
    ) -> None:
        httpx_mock.add_response(
            url="http://ollama.test:11434/api/tags",
            method="GET",
            json={"models": [{"name": "gpt-oss:20b"}, {"name": "qwen2.5:32b"}]},
        )
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
        )
        assert await client.selfcheck(expected_model="gpt-oss:20b") == "ok"

    async def test_selfcheck_degraded_when_model_missing(
        self, httpx_mock: HTTPXMock
    ) -> None:
        httpx_mock.add_response(
            url="http://ollama.test:11434/api/tags",
            method="GET",
            json={"models": [{"name": "qwen2.5:32b"}]},
        )
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
        )
        assert await client.selfcheck(expected_model="gpt-oss:20b") == "degraded"

    async def test_selfcheck_fail_on_connection_error(
        self, httpx_mock: HTTPXMock
    ) -> None:
        httpx_mock.add_exception(httpx.ConnectError("refused"))
        client = OllamaClient(
            base_url="http://ollama.test:11434", per_call_timeout_s=5.0
        )
        assert await client.selfcheck(expected_model="gpt-oss:20b") == "fail"