"""Tests for :func:`ix.ingestion.fetch.fetch_file` (spec ยง6.1).""" from __future__ import annotations from pathlib import Path import httpx import pytest from pytest_httpx import HTTPXMock from ix.contracts import FileRef from ix.errors import IXErrorCode, IXException from ix.ingestion import FetchConfig, fetch_file @pytest.fixture def cfg() -> FetchConfig: return FetchConfig( connect_timeout_s=1.0, read_timeout_s=2.0, max_bytes=1024 * 1024, ) class TestSuccessPath: async def test_downloads_with_auth_header_and_writes_to_tmp( self, tmp_path: Path, cfg: FetchConfig, httpx_mock: HTTPXMock ) -> None: url = "https://paperless.local/doc/123/download" httpx_mock.add_response( url=url, method="GET", status_code=200, content=b"%PDF-1.4 body", headers={"content-type": "application/pdf"}, ) file_ref = FileRef( url=url, headers={"Authorization": "Token abc"}, ) path = await fetch_file(file_ref, tmp_dir=tmp_path, cfg=cfg) assert path.exists() assert path.read_bytes() == b"%PDF-1.4 body" # Confirm header went out. reqs = httpx_mock.get_requests() assert len(reqs) == 1 assert reqs[0].headers["Authorization"] == "Token abc" class TestNon2xx: async def test_404_raises_IX_000_007( self, tmp_path: Path, cfg: FetchConfig, httpx_mock: HTTPXMock ) -> None: url = "https://host.local/missing.pdf" httpx_mock.add_response(url=url, status_code=404, content=b"") file_ref = FileRef(url=url) with pytest.raises(IXException) as ei: await fetch_file(file_ref, tmp_dir=tmp_path, cfg=cfg) assert ei.value.code is IXErrorCode.IX_000_007 assert "404" in (ei.value.detail or "") async def test_500_raises_IX_000_007( self, tmp_path: Path, cfg: FetchConfig, httpx_mock: HTTPXMock ) -> None: url = "https://host.local/boom.pdf" httpx_mock.add_response(url=url, status_code=500, content=b"oops") file_ref = FileRef(url=url) with pytest.raises(IXException) as ei: await fetch_file(file_ref, tmp_dir=tmp_path, cfg=cfg) assert ei.value.code is IXErrorCode.IX_000_007 class TestTimeout: async def test_timeout_raises_IX_000_007( self, tmp_path: Path, cfg: FetchConfig, httpx_mock: HTTPXMock ) -> None: url = "https://host.local/slow.pdf" httpx_mock.add_exception(httpx.ReadTimeout("slow"), url=url) file_ref = FileRef(url=url) with pytest.raises(IXException) as ei: await fetch_file(file_ref, tmp_dir=tmp_path, cfg=cfg) assert ei.value.code is IXErrorCode.IX_000_007 class TestOversize: async def test_oversize_raises_IX_000_007( self, tmp_path: Path, httpx_mock: HTTPXMock ) -> None: url = "https://host.local/big.pdf" cfg = FetchConfig( connect_timeout_s=1.0, read_timeout_s=2.0, max_bytes=100, ) # 500 bytes of payload; cap is 100. httpx_mock.add_response(url=url, status_code=200, content=b"x" * 500) file_ref = FileRef(url=url) with pytest.raises(IXException) as ei: await fetch_file(file_ref, tmp_dir=tmp_path, cfg=cfg) assert ei.value.code is IXErrorCode.IX_000_007 async def test_per_file_max_bytes_override( self, tmp_path: Path, httpx_mock: HTTPXMock ) -> None: url = "https://host.local/mid.pdf" cfg = FetchConfig( connect_timeout_s=1.0, read_timeout_s=2.0, max_bytes=1_000_000, ) # file_ref sets a tighter cap. httpx_mock.add_response(url=url, status_code=200, content=b"x" * 500) file_ref = FileRef(url=url, max_bytes=100) with pytest.raises(IXException) as ei: await fetch_file(file_ref, tmp_dir=tmp_path, cfg=cfg) assert ei.value.code is IXErrorCode.IX_000_007 class TestFileUrl: async def test_file_scheme_reads_local( self, tmp_path: Path, cfg: FetchConfig ) -> None: src = tmp_path / "in.pdf" src.write_bytes(b"%PDF-1.4\nfile scheme content") file_ref = FileRef(url=src.as_uri()) dst = await fetch_file(file_ref, tmp_dir=tmp_path / "out", cfg=cfg) assert dst.exists() assert dst.read_bytes() == b"%PDF-1.4\nfile scheme content" async def test_file_scheme_missing_raises( self, tmp_path: Path, cfg: FetchConfig ) -> None: missing = tmp_path / "nope.pdf" file_ref = FileRef(url=missing.as_uri()) with pytest.raises(IXException) as ei: await fetch_file(file_ref, tmp_dir=tmp_path, cfg=cfg) assert ei.value.code is IXErrorCode.IX_000_007