Files
marcus 65cb5a44c7 feat: add read_text tool — extract text from PDF/TXT/MD (v0.4.0)
Adds read_text tool with pypdf integration for PDF text extraction,
plain-text decode for TXT/MD/CSV/JSON/etc, page-filter support,
max_chars truncation, and 11 mock-based tests.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 17:09:32 +02:00

228 lines
7.8 KiB
Python

"""Tests for read_text tool."""
from __future__ import annotations
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from mcp_synology_filestation.client import SynologyError
from mcp_synology_filestation.config import AppConfig, ConnectionConfig
@pytest.fixture()
def config() -> AppConfig:
return AppConfig(
schema_version=1,
connection=ConnectionConfig(host="nas.example.com"),
)
def _make_tools(config: AppConfig, client: MagicMock) -> dict:
from mcp_synology_filestation.tools.filestation import register_filestation
registered: dict[str, object] = {}
mcp = MagicMock()
def tool_decorator():
def decorator(fn):
registered[fn.__name__] = fn
return fn
return decorator
mcp.tool = tool_decorator
register_filestation(mcp, config, client)
return registered
def _mock_pdf_reader(pages_text: list[str]) -> MagicMock:
"""Build a mock PdfReader whose .pages list returns the given text per page."""
mock_pages = []
for text in pages_text:
page = MagicMock()
page.extract_text.return_value = text
mock_pages.append(page)
reader = MagicMock()
reader.pages = mock_pages
return reader
# ── TXT / plain text ──────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_read_text_txt_success(config: AppConfig) -> None:
"""TXT file content is decoded and returned with header."""
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("readme.txt", b"Hello, world!"))
tools = _make_tools(config, client)
result = await tools["read_text"]("/docs/readme.txt")
assert "readme.txt" in result
assert "Hello, world!" in result
assert "13 chars" in result # len("Hello, world!") == 13
@pytest.mark.asyncio
async def test_read_text_md_success(config: AppConfig) -> None:
"""Markdown file is treated as plain text."""
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("notes.md", b"# Title\n\nBody text."))
tools = _make_tools(config, client)
result = await tools["read_text"]("/docs/notes.md")
assert "notes.md" in result
assert "# Title" in result
assert "Body text." in result
# ── PDF extraction ────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_read_text_pdf_all_pages(config: AppConfig) -> None:
"""PDF all-pages mode joins pages with separator."""
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("report.pdf", b"%PDF-1.4 stub"))
mock_reader = _mock_pdf_reader(["First page text.", "Second page text."])
tools = _make_tools(config, client)
with patch("pypdf.PdfReader", return_value=mock_reader):
result = await tools["read_text"]("/docs/report.pdf")
assert "report.pdf" in result
assert "First page text." in result
assert "Second page text." in result
assert "--- Page 2 ---" in result
@pytest.mark.asyncio
async def test_read_text_pdf_single_page(config: AppConfig) -> None:
"""PDF single-page mode returns only the requested page."""
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("doc.pdf", b"%PDF-1.4 stub"))
mock_reader = _mock_pdf_reader(["Page one.", "Page two.", "Page three."])
tools = _make_tools(config, client)
with patch("pypdf.PdfReader", return_value=mock_reader):
result = await tools["read_text"]("/docs/doc.pdf", page=2)
assert "Page 2/3" in result
assert "Page two." in result
assert "Page one." not in result
assert "Page three." not in result
@pytest.mark.asyncio
async def test_read_text_pdf_page_out_of_range(config: AppConfig) -> None:
"""Requesting a page beyond the PDF page count returns an error."""
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("doc.pdf", b"%PDF-1.4 stub"))
mock_reader = _mock_pdf_reader(["Only page."])
tools = _make_tools(config, client)
with patch("pypdf.PdfReader", return_value=mock_reader):
result = await tools["read_text"]("/docs/doc.pdf", page=5)
assert result.startswith("Error:")
assert "5" in result
assert "1" in result # total pages
@pytest.mark.asyncio
async def test_read_text_pdf_image_only(config: AppConfig) -> None:
"""PDF with no extractable text returns image-only error."""
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("scan.pdf", b"%PDF-1.4 stub"))
mock_reader = _mock_pdf_reader(["", ""]) # no text on any page
tools = _make_tools(config, client)
with patch("pypdf.PdfReader", return_value=mock_reader):
result = await tools["read_text"]("/docs/scan.pdf")
assert result.startswith("Error:")
assert "image-only" in result.lower() or "No extractable text" in result
# ── max_chars truncation ──────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_read_text_max_chars_truncation(config: AppConfig) -> None:
"""Text exceeding max_chars is truncated with a hint."""
long_text = "A" * 200
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("big.txt", long_text.encode()))
tools = _make_tools(config, client)
result = await tools["read_text"]("/data/big.txt", max_chars=50)
assert "Truncated" in result
assert "200 total chars" in result
assert "showing first 50" in result
# The returned content before the truncation note must be exactly 50 'A's
assert "A" * 50 in result
@pytest.mark.asyncio
async def test_read_text_max_chars_zero_no_limit(config: AppConfig) -> None:
"""max_chars=0 disables truncation."""
long_text = "B" * 100_000
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("huge.txt", long_text.encode()))
tools = _make_tools(config, client)
result = await tools["read_text"]("/data/huge.txt", max_chars=0)
assert "Truncated" not in result
assert "B" * 100 in result # spot-check some content
# ── error cases ───────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_read_text_unsupported_type(config: AppConfig) -> None:
"""Unknown file extension returns an unsupported-type error."""
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("binary.exe", b"\x00\x01\x02"))
tools = _make_tools(config, client)
result = await tools["read_text"]("/bin/binary.exe")
assert result.startswith("Error:")
assert "Unsupported file type" in result
@pytest.mark.asyncio
async def test_read_text_file_too_large(config: AppConfig) -> None:
"""Files exceeding 10 MB return a size error without downloading the full content."""
oversized = b"x" * (10 * 1024 * 1024 + 1)
client = MagicMock()
client.download_bytes = AsyncMock(return_value=("big.txt", oversized))
tools = _make_tools(config, client)
result = await tools["read_text"]("/data/big.txt")
assert result.startswith("Error:")
assert "10 MB" in result
@pytest.mark.asyncio
async def test_read_text_dsm_error(config: AppConfig) -> None:
"""DSM errors from download are surfaced as Error: messages."""
client = MagicMock()
client.download_bytes = AsyncMock(side_effect=SynologyError(1800, "File not found"))
tools = _make_tools(config, client)
result = await tools["read_text"]("/missing/file.txt")
assert result.startswith("Error:")