Runs Surya's detection + recognition over PIL images rendered from each Page's source file (PDFs via PyMuPDF, images via Pillow). Lazy warm_up so FastAPI lifespan start stays predictable. Deferred Surya/torch imports keep the base install slim — the heavy deps stay under [ocr]. Extends OCRClient Protocol with optional files + page_metadata kwargs so the engine can resolve each page back to its on-disk source; Fake accepts-and-ignores to keep hermetic tests unchanged. selfcheck() runs the predictors on a 1x1 PIL image — wired into /healthz by Task 4.3. Tests: 6 hermetic unit tests (Surya predictors mocked, no model download); 2 live tests gated on IX_TEST_OLLAMA=1 (never run in CI). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
166 lines
5.8 KiB
Python
166 lines
5.8 KiB
Python
"""Tests for :class:`SuryaOCRClient` — hermetic, no model download.
|
|
|
|
The real Surya predictors are patched out with :class:`unittest.mock.MagicMock`
|
|
that return trivially-shaped line objects. The tests assert the client's
|
|
translation layer — flattening polygons, mapping text_lines → ``Line``,
|
|
preserving ``page_no``/``width``/``height`` per input page.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from pathlib import Path
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from ix.contracts import Page
|
|
from ix.ocr.surya_client import SuryaOCRClient
|
|
from ix.segmentation import PageMetadata
|
|
|
|
|
|
def _make_surya_line(text: str, polygon: list[list[float]]) -> SimpleNamespace:
|
|
"""Mimic ``surya.recognition.schema.TextLine`` duck-typing-style."""
|
|
return SimpleNamespace(text=text, polygon=polygon, confidence=0.95)
|
|
|
|
|
|
def _make_surya_ocr_result(lines: list[SimpleNamespace]) -> SimpleNamespace:
|
|
"""Mimic ``surya.recognition.schema.OCRResult``."""
|
|
return SimpleNamespace(text_lines=lines, image_bbox=[0, 0, 100, 100])
|
|
|
|
|
|
class TestOCRBuildsOCRResultFromMockedPredictors:
|
|
async def test_one_image_one_line_flatten_polygon(self, tmp_path: Path) -> None:
|
|
img_path = tmp_path / "a.png"
|
|
_write_tiny_png(img_path)
|
|
|
|
mock_line = _make_surya_line(
|
|
text="hello",
|
|
polygon=[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]],
|
|
)
|
|
mock_predictor = MagicMock(
|
|
return_value=[_make_surya_ocr_result([mock_line])]
|
|
)
|
|
|
|
client = SuryaOCRClient()
|
|
# Skip the real warm_up; inject the mock directly.
|
|
client._recognition_predictor = mock_predictor
|
|
client._detection_predictor = MagicMock()
|
|
|
|
pages = [Page(page_no=1, width=100.0, height=50.0, lines=[])]
|
|
result = await client.ocr(
|
|
pages,
|
|
files=[(img_path, "image/png")],
|
|
page_metadata=[PageMetadata(file_index=0)],
|
|
)
|
|
|
|
assert len(result.result.pages) == 1
|
|
out_page = result.result.pages[0]
|
|
assert out_page.page_no == 1
|
|
assert out_page.width == 100.0
|
|
assert out_page.height == 50.0
|
|
assert len(out_page.lines) == 1
|
|
assert out_page.lines[0].text == "hello"
|
|
assert out_page.lines[0].bounding_box == [
|
|
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0
|
|
]
|
|
|
|
async def test_multiple_pages_preserves_order(self, tmp_path: Path) -> None:
|
|
img_a = tmp_path / "a.png"
|
|
img_b = tmp_path / "b.png"
|
|
_write_tiny_png(img_a)
|
|
_write_tiny_png(img_b)
|
|
|
|
mock_predictor = MagicMock(
|
|
return_value=[
|
|
_make_surya_ocr_result(
|
|
[_make_surya_line("a-line", [[0, 0], [1, 0], [1, 1], [0, 1]])]
|
|
),
|
|
_make_surya_ocr_result(
|
|
[_make_surya_line("b-line", [[0, 0], [1, 0], [1, 1], [0, 1]])]
|
|
),
|
|
]
|
|
)
|
|
|
|
client = SuryaOCRClient()
|
|
client._recognition_predictor = mock_predictor
|
|
client._detection_predictor = MagicMock()
|
|
|
|
pages = [
|
|
Page(page_no=1, width=10.0, height=20.0, lines=[]),
|
|
Page(page_no=2, width=10.0, height=20.0, lines=[]),
|
|
]
|
|
result = await client.ocr(
|
|
pages,
|
|
files=[(img_a, "image/png"), (img_b, "image/png")],
|
|
page_metadata=[
|
|
PageMetadata(file_index=0),
|
|
PageMetadata(file_index=1),
|
|
],
|
|
)
|
|
|
|
assert [p.lines[0].text for p in result.result.pages] == ["a-line", "b-line"]
|
|
|
|
async def test_lazy_warm_up_on_first_ocr(self, tmp_path: Path) -> None:
|
|
img = tmp_path / "x.png"
|
|
_write_tiny_png(img)
|
|
|
|
client = SuryaOCRClient()
|
|
|
|
# Use patch.object on the instance's warm_up so we don't need real
|
|
# Surya module loading.
|
|
with patch.object(client, "warm_up", autospec=True) as mocked_warm_up:
|
|
# After warm_up is called, the predictors must be assigned.
|
|
def fake_warm_up(self: SuryaOCRClient) -> None:
|
|
self._recognition_predictor = MagicMock(
|
|
return_value=[
|
|
_make_surya_ocr_result(
|
|
[
|
|
_make_surya_line(
|
|
"hi", [[0, 0], [1, 0], [1, 1], [0, 1]]
|
|
)
|
|
]
|
|
)
|
|
]
|
|
)
|
|
self._detection_predictor = MagicMock()
|
|
|
|
mocked_warm_up.side_effect = lambda: fake_warm_up(client)
|
|
|
|
pages = [Page(page_no=1, width=10.0, height=10.0, lines=[])]
|
|
await client.ocr(
|
|
pages,
|
|
files=[(img, "image/png")],
|
|
page_metadata=[PageMetadata(file_index=0)],
|
|
)
|
|
mocked_warm_up.assert_called_once()
|
|
|
|
|
|
class TestSelfcheck:
|
|
async def test_selfcheck_ok_with_mocked_predictors(self) -> None:
|
|
client = SuryaOCRClient()
|
|
client._recognition_predictor = MagicMock(
|
|
return_value=[_make_surya_ocr_result([])]
|
|
)
|
|
client._detection_predictor = MagicMock()
|
|
assert await client.selfcheck() == "ok"
|
|
|
|
async def test_selfcheck_fail_when_predictor_raises(self) -> None:
|
|
client = SuryaOCRClient()
|
|
client._recognition_predictor = MagicMock(
|
|
side_effect=RuntimeError("cuda broken")
|
|
)
|
|
client._detection_predictor = MagicMock()
|
|
assert await client.selfcheck() == "fail"
|
|
|
|
|
|
def _write_tiny_png(path: Path) -> None:
|
|
"""Write a 2x2 white PNG so PIL can open it."""
|
|
from PIL import Image
|
|
|
|
Image.new("RGB", (2, 2), color="white").save(path, format="PNG")
|
|
|
|
|
|
@pytest.mark.parametrize("unused", [None]) # keep pytest happy if file ever runs alone
|
|
def test_module_imports(unused: None) -> None:
|
|
assert SuryaOCRClient is not None
|