diff --git a/pyproject.toml b/pyproject.toml index 3308a1a..04234db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-synology-filestation" -version = "0.3.6" +version = "0.4.0" description = "MCP server for Synology FileStation" requires-python = ">=3.12" dependencies = [ @@ -10,6 +10,7 @@ dependencies = [ "keyring>=25.0.0", "click>=8.1.0", "rich>=13.0.0", + "pypdf>=4.0.0", ] [project.optional-dependencies] diff --git a/src/mcp_synology_filestation/__init__.py b/src/mcp_synology_filestation/__init__.py index c5ec84c..147fe50 100644 --- a/src/mcp_synology_filestation/__init__.py +++ b/src/mcp_synology_filestation/__init__.py @@ -1,3 +1,3 @@ """MCP server for Synology FileStation.""" -__version__ = "0.3.6" +__version__ = "0.4.0" diff --git a/src/mcp_synology_filestation/tools/filestation.py b/src/mcp_synology_filestation/tools/filestation.py index d972028..ba847e6 100644 --- a/src/mcp_synology_filestation/tools/filestation.py +++ b/src/mcp_synology_filestation/tools/filestation.py @@ -1319,6 +1319,109 @@ def register_filestation( return "\n".join(lines) + @mcp.tool() + async def read_text(path: str, max_chars: int = 50_000, page: int = 0): + """Extract readable text from a PDF, TXT, or MD file (max 10 MB).""" + import io + + from mcp_synology_filestation.client import SynologyError + + max_read_bytes = 10 * 1024 * 1024 + + plain_text_exts = { + ".txt", + ".md", + ".markdown", + ".csv", + ".log", + ".yaml", + ".yml", + ".json", + ".xml", + ".html", + ".htm", + } + + path_lower = path.lower() + if path_lower.endswith(".pdf"): + file_type = "pdf" + elif any(path_lower.endswith(ext) for ext in plain_text_exts): + file_type = "text" + else: + return ( + "Error: Unsupported file type. Supported: PDF, TXT, MD, CSV, JSON, YAML, XML, HTML" + " and other plain text formats." + ) + + try: + filename, content = await client.download_bytes(path) + except SynologyError as e: + return f"Error: {e}" + + size = len(content) + if size > max_read_bytes: + return f"Error: File too large ({size / 1024 / 1024:.1f} MB). Maximum is 10 MB." + + total_pages = 0 + + if file_type == "pdf": + try: + from pypdf import PdfReader + + reader = PdfReader(io.BytesIO(content)) + total_pages = len(reader.pages) + + if page == 0: + page_texts = [pg.extract_text() or "" for pg in reader.pages] + if not any(t.strip() for t in page_texts): + return ( + "Error: No extractable text found. The PDF may be image-only" + " (scanned without OCR layer)." + ) + parts: list[str] = [] + for i, pg_text in enumerate(page_texts): + if i == 0: + parts.append(pg_text) + else: + parts.append(f"\n\n--- Page {i + 1} ---\n\n{pg_text}") + text = "".join(parts) + else: + if page > total_pages: + return ( + f"Error: Page {page} does not exist — " + f"this PDF has {total_pages} page(s)." + ) + text = reader.pages[page - 1].extract_text() or "" + if not text.strip(): + return ( + "Error: No extractable text found. The PDF may be image-only" + " (scanned without OCR layer)." + ) + + except SynologyError: + raise + except Exception as exc: + return f"Error: Failed to parse PDF: {exc}" + else: + text = content.decode("utf-8", errors="replace") + + full_len = len(text) + + if max_chars > 0 and full_len > max_chars: + text = ( + text[:max_chars] + + f"\n\n[Truncated: {full_len} total chars, showing first {max_chars}." + + " Use max_chars parameter to adjust.]" + ) + + display_name = path.rsplit("/", 1)[-1] + if file_type == "pdf" and page != 0: + header = f"[{display_name} — Page {page}/{total_pages} — {full_len} chars]" + else: + header = f"[{display_name} — {full_len} chars]" + + return f"{header}\n{text}" + @mcp.tool() async def list_snapshots(share_path: str, offset: int = 0, limit: int = 100): """List Btrfs snapshots for a shared folder (requires Btrfs volume).""" diff --git a/tests/test_read_text.py b/tests/test_read_text.py new file mode 100644 index 0000000..4af742d --- /dev/null +++ b/tests/test_read_text.py @@ -0,0 +1,227 @@ +"""Tests for read_text tool.""" + +from __future__ import annotations + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from mcp_synology_filestation.client import SynologyError +from mcp_synology_filestation.config import AppConfig, ConnectionConfig + + +@pytest.fixture() +def config() -> AppConfig: + return AppConfig( + schema_version=1, + connection=ConnectionConfig(host="nas.example.com"), + ) + + +def _make_tools(config: AppConfig, client: MagicMock) -> dict: + from mcp_synology_filestation.tools.filestation import register_filestation + + registered: dict[str, object] = {} + mcp = MagicMock() + + def tool_decorator(): + def decorator(fn): + registered[fn.__name__] = fn + return fn + + return decorator + + mcp.tool = tool_decorator + register_filestation(mcp, config, client) + return registered + + +def _mock_pdf_reader(pages_text: list[str]) -> MagicMock: + """Build a mock PdfReader whose .pages list returns the given text per page.""" + mock_pages = [] + for text in pages_text: + page = MagicMock() + page.extract_text.return_value = text + mock_pages.append(page) + reader = MagicMock() + reader.pages = mock_pages + return reader + + +# ── TXT / plain text ────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_read_text_txt_success(config: AppConfig) -> None: + """TXT file content is decoded and returned with header.""" + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("readme.txt", b"Hello, world!")) + + tools = _make_tools(config, client) + result = await tools["read_text"]("/docs/readme.txt") + + assert "readme.txt" in result + assert "Hello, world!" in result + assert "13 chars" in result # len("Hello, world!") == 13 + + +@pytest.mark.asyncio +async def test_read_text_md_success(config: AppConfig) -> None: + """Markdown file is treated as plain text.""" + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("notes.md", b"# Title\n\nBody text.")) + + tools = _make_tools(config, client) + result = await tools["read_text"]("/docs/notes.md") + + assert "notes.md" in result + assert "# Title" in result + assert "Body text." in result + + +# ── PDF extraction ──────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_read_text_pdf_all_pages(config: AppConfig) -> None: + """PDF all-pages mode joins pages with separator.""" + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("report.pdf", b"%PDF-1.4 stub")) + + mock_reader = _mock_pdf_reader(["First page text.", "Second page text."]) + tools = _make_tools(config, client) + + with patch("pypdf.PdfReader", return_value=mock_reader): + result = await tools["read_text"]("/docs/report.pdf") + + assert "report.pdf" in result + assert "First page text." in result + assert "Second page text." in result + assert "--- Page 2 ---" in result + + +@pytest.mark.asyncio +async def test_read_text_pdf_single_page(config: AppConfig) -> None: + """PDF single-page mode returns only the requested page.""" + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("doc.pdf", b"%PDF-1.4 stub")) + + mock_reader = _mock_pdf_reader(["Page one.", "Page two.", "Page three."]) + tools = _make_tools(config, client) + + with patch("pypdf.PdfReader", return_value=mock_reader): + result = await tools["read_text"]("/docs/doc.pdf", page=2) + + assert "Page 2/3" in result + assert "Page two." in result + assert "Page one." not in result + assert "Page three." not in result + + +@pytest.mark.asyncio +async def test_read_text_pdf_page_out_of_range(config: AppConfig) -> None: + """Requesting a page beyond the PDF page count returns an error.""" + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("doc.pdf", b"%PDF-1.4 stub")) + + mock_reader = _mock_pdf_reader(["Only page."]) + tools = _make_tools(config, client) + + with patch("pypdf.PdfReader", return_value=mock_reader): + result = await tools["read_text"]("/docs/doc.pdf", page=5) + + assert result.startswith("Error:") + assert "5" in result + assert "1" in result # total pages + + +@pytest.mark.asyncio +async def test_read_text_pdf_image_only(config: AppConfig) -> None: + """PDF with no extractable text returns image-only error.""" + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("scan.pdf", b"%PDF-1.4 stub")) + + mock_reader = _mock_pdf_reader(["", ""]) # no text on any page + tools = _make_tools(config, client) + + with patch("pypdf.PdfReader", return_value=mock_reader): + result = await tools["read_text"]("/docs/scan.pdf") + + assert result.startswith("Error:") + assert "image-only" in result.lower() or "No extractable text" in result + + +# ── max_chars truncation ────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_read_text_max_chars_truncation(config: AppConfig) -> None: + """Text exceeding max_chars is truncated with a hint.""" + long_text = "A" * 200 + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("big.txt", long_text.encode())) + + tools = _make_tools(config, client) + result = await tools["read_text"]("/data/big.txt", max_chars=50) + + assert "Truncated" in result + assert "200 total chars" in result + assert "showing first 50" in result + # The returned content before the truncation note must be exactly 50 'A's + assert "A" * 50 in result + + +@pytest.mark.asyncio +async def test_read_text_max_chars_zero_no_limit(config: AppConfig) -> None: + """max_chars=0 disables truncation.""" + long_text = "B" * 100_000 + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("huge.txt", long_text.encode())) + + tools = _make_tools(config, client) + result = await tools["read_text"]("/data/huge.txt", max_chars=0) + + assert "Truncated" not in result + assert "B" * 100 in result # spot-check some content + + +# ── error cases ─────────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_read_text_unsupported_type(config: AppConfig) -> None: + """Unknown file extension returns an unsupported-type error.""" + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("binary.exe", b"\x00\x01\x02")) + + tools = _make_tools(config, client) + result = await tools["read_text"]("/bin/binary.exe") + + assert result.startswith("Error:") + assert "Unsupported file type" in result + + +@pytest.mark.asyncio +async def test_read_text_file_too_large(config: AppConfig) -> None: + """Files exceeding 10 MB return a size error without downloading the full content.""" + oversized = b"x" * (10 * 1024 * 1024 + 1) + client = MagicMock() + client.download_bytes = AsyncMock(return_value=("big.txt", oversized)) + + tools = _make_tools(config, client) + result = await tools["read_text"]("/data/big.txt") + + assert result.startswith("Error:") + assert "10 MB" in result + + +@pytest.mark.asyncio +async def test_read_text_dsm_error(config: AppConfig) -> None: + """DSM errors from download are surfaced as Error: messages.""" + client = MagicMock() + client.download_bytes = AsyncMock(side_effect=SynologyError(1800, "File not found")) + + tools = _make_tools(config, client) + result = await tools["read_text"]("/missing/file.txt") + + assert result.startswith("Error:")