From 8b2f07d9c39fe0c4d9c8ea24dfa014b8ff055675 Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Tue, 14 Apr 2026 14:18:43 +0200 Subject: [PATCH] revert: restore _poll_task and dir_size/get_md5 to 0.2.2 state All changes since 0.2.2 to _poll_task, dir_size, and get_md5 (window_timeout, _poll_oneshot, start_and_poll_immediately) are reverted. The 0.2.2 behaviour worked reliably for small directories and is the last known-good baseline. The remaining known limitation (occasional 599 on large directories) is documented in SPEC.md. Retry the operation as a workaround. Co-Authored-By: Claude Sonnet 4.6 --- SPEC.md | 24 ++++ pyproject.toml | 2 +- src/mcp_synology_filestation/__init__.py | 2 +- src/mcp_synology_filestation/client.py | 46 ------- .../tools/filestation.py | 125 ++++-------------- tests/test_tools_filestation.py | 25 +--- 6 files changed, 55 insertions(+), 169 deletions(-) diff --git a/SPEC.md b/SPEC.md index e3899ed..c119cf5 100644 --- a/SPEC.md +++ b/SPEC.md @@ -402,3 +402,27 @@ Validate config, resolve credentials, test login, list available FileStation API Load config and credentials, create `FileStationClient` (lazy — no immediate connection), create and run `FastMCP("mcp-synology-filestation")` over stdio. Uses `anyio.run()` for Windows compatibility. + + +--- + +## Known Limitations + +### `dir_size` / `get_md5`: occasional "DSM error code 599" on large directories + +DSM's `DirSize` and `MD5` APIs are one-shot: once `finished=true` is returned +by the status endpoint, the task is removed and all subsequent polls return +error 599. The MCP server polls immediately after `start` (no initial delay) +and retries up to 5 consecutive 599 responses before giving up. + +For small directories and files the result is reliably read on the first or +second poll. For large directories (e.g. `/docker`, `/music`) the task takes +longer; if DSM removes the completed result between two polls the tool returns +`Error: DSM error code 599`. Retrying the operation usually succeeds. + +Root cause is not fully understood. The raw HTTP test script (`test_dirsize_md5.py`) +reliably catches `finished=true` for the same paths, suggesting the issue is +related to timing in the MCP stdio event loop. + +**Workaround:** retry the `dir_size` call. It succeeds on the second or third attempt +for most paths. diff --git a/pyproject.toml b/pyproject.toml index f095a64..55aeb2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-synology-filestation" -version = "0.2.7" +version = "0.2.8" description = "MCP server for Synology FileStation" requires-python = ">=3.12" dependencies = [ diff --git a/src/mcp_synology_filestation/__init__.py b/src/mcp_synology_filestation/__init__.py index eddebb3..25b6fe8 100644 --- a/src/mcp_synology_filestation/__init__.py +++ b/src/mcp_synology_filestation/__init__.py @@ -1,3 +1,3 @@ """MCP server for Synology FileStation.""" -__version__ = "0.2.7" +__version__ = "0.2.8" diff --git a/src/mcp_synology_filestation/client.py b/src/mcp_synology_filestation/client.py index eafe831..2c06f6c 100644 --- a/src/mcp_synology_filestation/client.py +++ b/src/mcp_synology_filestation/client.py @@ -307,52 +307,6 @@ class FileStationClient: raise SynologyError(_error_message(code, api), code=code) - async def start_and_poll_immediately( - self, - api: str, - start_params: dict[str, Any], - poll_version: int, - *, - start_version: int | None = None, - ) -> tuple[str, dict[str, Any] | None]: - """Start a DSM async task and immediately make the first status poll. - - Designed for one-shot tasks (DirSize, MD5) where the result window - may close quickly. Both the ``start`` and the first ``status`` request - are issued inside this single method with no intermediate awaits other - than the HTTP calls themselves, minimising scheduler latency. - - Args: - api: DSM API name (e.g. "SYNO.FileStation.DirSize"). - start_params: Query parameters for the ``start`` call. - poll_version: API version to use for the ``status`` call. - start_version: API version for the ``start`` call (defaults to - ``maxVersion`` from the API info cache). - - Returns: - ``(taskid, status_data)`` where ``status_data`` is ``None`` if - the first status poll returned 599 (task not yet visible). - - Raises: - SynologyError: If the ``start`` call fails, the response contains - no task ID, or the ``status`` call fails with a non-599 error. - """ - start_data = await self.request(api, "start", version=start_version, params=start_params) - taskid: str = start_data.get("taskid", "") - if not taskid: - raise SynologyError("DSM did not return a task ID.", code=0) - - try: - status_data = await self.request( - api, "status", version=poll_version, params={"taskid": taskid} - ) - except SynologyError as e: - if e.code == 599: - return taskid, None - raise - - return taskid, status_data - async def download_bytes(self, path: str) -> tuple[str, bytes]: """Download a file from the NAS via SYNO.FileStation.Download. diff --git a/src/mcp_synology_filestation/tools/filestation.py b/src/mcp_synology_filestation/tools/filestation.py index 1050a14..69c8fa6 100644 --- a/src/mcp_synology_filestation/tools/filestation.py +++ b/src/mcp_synology_filestation/tools/filestation.py @@ -69,15 +69,13 @@ def register_filestation( ) -> tuple[bool, dict[str, Any] | str]: """Poll a DSM async task until finished or timeout. - For tasks that return intermediate ``finished=False`` status while - running (CopyMove, Delete, Compress, Extract, Search). Use - ``_poll_oneshot`` for DirSize and MD5. - Args: api: DSM API name (e.g. "SYNO.FileStation.CopyMove"). version: API version to use for the status call. taskid: Task ID returned by the corresponding start method. initial_delay: Seconds to wait before the first status poll. + Set to 0.0 for tasks that may finish before the first poll + interval (e.g. DirSize on small directories, MD5 on small files). Returns: ``(True, status_dict)`` on success, or ``(False, "Error: …")`` on @@ -88,6 +86,7 @@ def register_filestation( delay = 0.2 elapsed = initial_delay timeout = 60.0 + consecutive_599 = 0 if initial_delay > 0: await asyncio.sleep(initial_delay) @@ -100,9 +99,14 @@ def register_filestation( version=version, params={"taskid": taskid}, ) + consecutive_599 = 0 except _SynologyError as e: if e.code == 599: - pass # task not yet visible — keep polling + # 599 can be transient (task just started, not yet available). + # Retry up to 5 times before giving up. + consecutive_599 += 1 + if consecutive_599 >= 5: + return False, f"Error: {e}" else: return False, f"Error: {e}" else: @@ -119,89 +123,6 @@ def register_filestation( elapsed += delay delay = min(delay * 2, 2.0) - async def _poll_oneshot( - api: str, - version: int, - taskid: str, - first_status: dict[str, Any] | None, - ) -> tuple[bool, dict[str, Any] | str]: - """Continue polling a one-shot DSM task after the first status poll. - - Called after ``client.start_and_poll_immediately`` has already made - the first status request. Handles three outcomes for ``first_status``: - - * ``finished=True`` — return immediately (task done on first poll). - * ``finished=False`` — task confirmed running; enter Phase 2 - (exponential backoff until ``finished=True`` or 60 s timeout). - * ``None`` (first poll returned 599) — burst-retry 10× at 10 ms, - then enter Phase 2 regardless (large directories will eventually - return ``finished=False``; a 599 after the task was seen alive - means the window closed — fail fast with a retry message). - - Returns: - ``(True, status_dict)`` on success, or ``(False, "Error: …")`` - on DSM error or timeout. - """ - from mcp_synology_filestation.client import SynologyError as _SynologyError - - seen_alive = False - - if first_status is not None: - if first_status.get("finished"): - return True, first_status - seen_alive = True # finished=False: task is running - else: - # 599 on the immediate poll: burst-retry (10×, 10 ms apart) - for _ in range(10): - await asyncio.sleep(0.01) - try: - s = await client.request( - api, "status", version=version, params={"taskid": taskid} - ) - except _SynologyError as e: - if e.code == 599: - continue - return False, f"Error: {e}" - if s.get("finished"): - return True, s - seen_alive = True - break # finished=False: enter Phase 2 - - # ── Phase 2: exponential backoff until finished or 60 s timeout ── - delay = 0.2 - elapsed = 0.0 - timeout = 60.0 - - while True: - await asyncio.sleep(delay) - elapsed += delay - delay = min(delay * 2, 2.0) - - try: - s = await client.request(api, "status", version=version, params={"taskid": taskid}) - except _SynologyError as e: - if e.code == 599: - if seen_alive: - # Task was running but the one-shot window closed before we read it - return ( - False, - "Error: Could not read task result — the operation finished" - " before the result was polled. Please retry.", - ) - # Not yet seen alive: large dir still initialising, keep polling - else: - return False, f"Error: {e}" - else: - seen_alive = True - if s.get("finished"): - return True, s - - if elapsed >= timeout: - return ( - False, - "Error: Operation timed out after 60 seconds — check NAS manually.", - ) - @mcp.tool() async def list_shares(): """List all shared folders. Returns name/path/volume-usage table.""" @@ -890,16 +811,20 @@ def register_filestation( return "Error: no path provided." try: - taskid, first_status = await client.start_and_poll_immediately( + start_data = await client.request( "SYNO.FileStation.DirSize", - start_params={"path": json.dumps(paths)}, - poll_version=1, - start_version=2, + "start", + version=2, + params={"path": json.dumps(paths)}, ) except SynologyError as e: return f"Error: {e}" - ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid, first_status) + taskid: str = start_data.get("taskid", "") + if not taskid: + return "Error: DSM did not return a task ID." + + ok, result = await _poll_task("SYNO.FileStation.DirSize", 1, taskid, initial_delay=0.0) if not ok: return result # type: ignore[return-value] @@ -947,16 +872,20 @@ def register_filestation( from mcp_synology_filestation.client import SynologyError try: - taskid, first_status = await client.start_and_poll_immediately( + start_data = await client.request( "SYNO.FileStation.MD5", - start_params={"file_path": json.dumps(path)}, - poll_version=1, - start_version=2, + "start", + version=2, + params={"file_path": json.dumps(path)}, ) except SynologyError as e: return f"Error: {e}" - ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid, first_status) + taskid: str = start_data.get("taskid", "") + if not taskid: + return "Error: DSM did not return a task ID." + + ok, result = await _poll_task("SYNO.FileStation.MD5", 1, taskid, initial_delay=0.0) if not ok: return result # type: ignore[return-value] diff --git a/tests/test_tools_filestation.py b/tests/test_tools_filestation.py index 032a67e..affe958 100644 --- a/tests/test_tools_filestation.py +++ b/tests/test_tools_filestation.py @@ -22,24 +22,8 @@ def config() -> AppConfig: def _make_mcp_and_tools(config: AppConfig, client: MagicMock) -> dict: """Register FileStation tools on a mock FastMCP and collect them by name.""" - from mcp_synology_filestation.client import FileStationClient from mcp_synology_filestation.tools.filestation import register_filestation - # Bind the real start_and_poll_immediately so it delegates into the - # already-mocked client.request — no separate mock needed per test. - async def _start_and_poll_immediately( - api: str, - start_params: dict, - poll_version: int, - *, - start_version: int | None = None, - ): - return await FileStationClient.start_and_poll_immediately( - client, api, start_params, poll_version, start_version=start_version - ) - - client.start_and_poll_immediately = _start_and_poll_immediately - registered: dict[str, object] = {} mcp = MagicMock() @@ -1651,12 +1635,8 @@ async def test_dir_size_retries_on_transient_599(config: AppConfig) -> None: @pytest.mark.asyncio -async def test_dir_size_times_out_on_persistent_599(config: AppConfig) -> None: - """dir_size times out after 60 s when DSM returns only 599s for every poll. - - The immediate poll + burst both return 599; Phase 2 keeps polling (large - directories eventually surface) until the 60 s timeout fires. - """ +async def test_dir_size_fails_after_5_consecutive_599(config: AppConfig) -> None: + """dir_size gives up and returns Error: after 5 consecutive 599 responses.""" client = MagicMock() async def _request(api, method, version=None, params=None, **kwargs): @@ -1671,7 +1651,6 @@ async def test_dir_size_times_out_on_persistent_599(config: AppConfig) -> None: result = await tools["dir_size"](path="/dead") assert result.startswith("Error:") - assert "timed out" in result.lower() or "60 seconds" in result # ──────────────────────────────────────────────────────────────────────────