"""Tests for :class:`SuryaOCRClient` — hermetic, no model download. The real Surya predictors are patched out with :class:`unittest.mock.MagicMock` that return trivially-shaped line objects. The tests assert the client's translation layer — flattening polygons, mapping text_lines → ``Line``, preserving ``page_no``/``width``/``height`` per input page. """ from __future__ import annotations from pathlib import Path from types import SimpleNamespace from unittest.mock import MagicMock, patch import pytest from ix.contracts import Page from ix.ocr.surya_client import SuryaOCRClient from ix.segmentation import PageMetadata def _make_surya_line(text: str, polygon: list[list[float]]) -> SimpleNamespace: """Mimic ``surya.recognition.schema.TextLine`` duck-typing-style.""" return SimpleNamespace(text=text, polygon=polygon, confidence=0.95) def _make_surya_ocr_result(lines: list[SimpleNamespace]) -> SimpleNamespace: """Mimic ``surya.recognition.schema.OCRResult``.""" return SimpleNamespace(text_lines=lines, image_bbox=[0, 0, 100, 100]) class TestOCRBuildsOCRResultFromMockedPredictors: async def test_one_image_one_line_flatten_polygon(self, tmp_path: Path) -> None: img_path = tmp_path / "a.png" _write_tiny_png(img_path) mock_line = _make_surya_line( text="hello", polygon=[[1.0, 2.0], [3.0, 4.0], [5.0, 6.0], [7.0, 8.0]], ) mock_predictor = MagicMock( return_value=[_make_surya_ocr_result([mock_line])] ) client = SuryaOCRClient() # Skip the real warm_up; inject the mock directly. client._recognition_predictor = mock_predictor client._detection_predictor = MagicMock() pages = [Page(page_no=1, width=100.0, height=50.0, lines=[])] result = await client.ocr( pages, files=[(img_path, "image/png")], page_metadata=[PageMetadata(file_index=0)], ) assert len(result.result.pages) == 1 out_page = result.result.pages[0] assert out_page.page_no == 1 assert out_page.width == 100.0 assert out_page.height == 50.0 assert len(out_page.lines) == 1 assert out_page.lines[0].text == "hello" assert out_page.lines[0].bounding_box == [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 ] async def test_multiple_pages_preserves_order(self, tmp_path: Path) -> None: img_a = tmp_path / "a.png" img_b = tmp_path / "b.png" _write_tiny_png(img_a) _write_tiny_png(img_b) mock_predictor = MagicMock( return_value=[ _make_surya_ocr_result( [_make_surya_line("a-line", [[0, 0], [1, 0], [1, 1], [0, 1]])] ), _make_surya_ocr_result( [_make_surya_line("b-line", [[0, 0], [1, 0], [1, 1], [0, 1]])] ), ] ) client = SuryaOCRClient() client._recognition_predictor = mock_predictor client._detection_predictor = MagicMock() pages = [ Page(page_no=1, width=10.0, height=20.0, lines=[]), Page(page_no=2, width=10.0, height=20.0, lines=[]), ] result = await client.ocr( pages, files=[(img_a, "image/png"), (img_b, "image/png")], page_metadata=[ PageMetadata(file_index=0), PageMetadata(file_index=1), ], ) assert [p.lines[0].text for p in result.result.pages] == ["a-line", "b-line"] async def test_lazy_warm_up_on_first_ocr(self, tmp_path: Path) -> None: img = tmp_path / "x.png" _write_tiny_png(img) client = SuryaOCRClient() # Use patch.object on the instance's warm_up so we don't need real # Surya module loading. with patch.object(client, "warm_up", autospec=True) as mocked_warm_up: # After warm_up is called, the predictors must be assigned. def fake_warm_up(self: SuryaOCRClient) -> None: self._recognition_predictor = MagicMock( return_value=[ _make_surya_ocr_result( [ _make_surya_line( "hi", [[0, 0], [1, 0], [1, 1], [0, 1]] ) ] ) ] ) self._detection_predictor = MagicMock() mocked_warm_up.side_effect = lambda: fake_warm_up(client) pages = [Page(page_no=1, width=10.0, height=10.0, lines=[])] await client.ocr( pages, files=[(img, "image/png")], page_metadata=[PageMetadata(file_index=0)], ) mocked_warm_up.assert_called_once() class TestSelfcheck: async def test_selfcheck_ok_with_mocked_predictors(self) -> None: client = SuryaOCRClient() client._recognition_predictor = MagicMock( return_value=[_make_surya_ocr_result([])] ) client._detection_predictor = MagicMock() assert await client.selfcheck() == "ok" async def test_selfcheck_fail_when_predictor_raises(self) -> None: client = SuryaOCRClient() client._recognition_predictor = MagicMock( side_effect=RuntimeError("cuda broken") ) client._detection_predictor = MagicMock() assert await client.selfcheck() == "fail" def _write_tiny_png(path: Path) -> None: """Write a 2x2 white PNG so PIL can open it.""" from PIL import Image Image.new("RGB", (2, 2), color="white").save(path, format="PNG") @pytest.mark.parametrize("unused", [None]) # keep pytest happy if file ever runs alone def test_module_imports(unused: None) -> None: assert SuryaOCRClient is not None