revert: restore _poll_task and dir_size/get_md5 to 0.2.2 state

All changes since 0.2.2 to _poll_task, dir_size, and get_md5 (window_timeout,
_poll_oneshot, start_and_poll_immediately) are reverted. The 0.2.2 behaviour
worked reliably for small directories and is the last known-good baseline.

The remaining known limitation (occasional 599 on large directories) is
documented in SPEC.md. Retry the operation as a workaround.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-14 14:18:43 +02:00
parent 62f8e41931
commit 8b2f07d9c3
6 changed files with 55 additions and 169 deletions
+24
View File
@@ -402,3 +402,27 @@ Validate config, resolve credentials, test login, list available FileStation API
Load config and credentials, create `FileStationClient` (lazy — no immediate connection), Load config and credentials, create `FileStationClient` (lazy — no immediate connection),
create and run `FastMCP("mcp-synology-filestation")` over stdio. Uses `anyio.run()` for create and run `FastMCP("mcp-synology-filestation")` over stdio. Uses `anyio.run()` for
Windows compatibility. Windows compatibility.
---
## Known Limitations
### `dir_size` / `get_md5`: occasional "DSM error code 599" on large directories
DSM's `DirSize` and `MD5` APIs are one-shot: once `finished=true` is returned
by the status endpoint, the task is removed and all subsequent polls return
error 599. The MCP server polls immediately after `start` (no initial delay)
and retries up to 5 consecutive 599 responses before giving up.
For small directories and files the result is reliably read on the first or
second poll. For large directories (e.g. `/docker`, `/music`) the task takes
longer; if DSM removes the completed result between two polls the tool returns
`Error: DSM error code 599`. Retrying the operation usually succeeds.
Root cause is not fully understood. The raw HTTP test script (`test_dirsize_md5.py`)
reliably catches `finished=true` for the same paths, suggesting the issue is
related to timing in the MCP stdio event loop.
**Workaround:** retry the `dir_size` call. It succeeds on the second or third attempt
for most paths.
+1 -1
View File
@@ -1,6 +1,6 @@
[project] [project]
name = "mcp-synology-filestation" name = "mcp-synology-filestation"
version = "0.2.7" version = "0.2.8"
description = "MCP server for Synology FileStation" description = "MCP server for Synology FileStation"
requires-python = ">=3.12" requires-python = ">=3.12"
dependencies = [ dependencies = [
+1 -1
View File
@@ -1,3 +1,3 @@
"""MCP server for Synology FileStation.""" """MCP server for Synology FileStation."""
__version__ = "0.2.7" __version__ = "0.2.8"
-46
View File
@@ -307,52 +307,6 @@ class FileStationClient:
raise SynologyError(_error_message(code, api), code=code) raise SynologyError(_error_message(code, api), code=code)
async def start_and_poll_immediately(
self,
api: str,
start_params: dict[str, Any],
poll_version: int,
*,
start_version: int | None = None,
) -> tuple[str, dict[str, Any] | None]:
"""Start a DSM async task and immediately make the first status poll.
Designed for one-shot tasks (DirSize, MD5) where the result window
may close quickly. Both the ``start`` and the first ``status`` request
are issued inside this single method with no intermediate awaits other
than the HTTP calls themselves, minimising scheduler latency.
Args:
api: DSM API name (e.g. "SYNO.FileStation.DirSize").
start_params: Query parameters for the ``start`` call.
poll_version: API version to use for the ``status`` call.
start_version: API version for the ``start`` call (defaults to
``maxVersion`` from the API info cache).
Returns:
``(taskid, status_data)`` where ``status_data`` is ``None`` if
the first status poll returned 599 (task not yet visible).
Raises:
SynologyError: If the ``start`` call fails, the response contains
no task ID, or the ``status`` call fails with a non-599 error.
"""
start_data = await self.request(api, "start", version=start_version, params=start_params)
taskid: str = start_data.get("taskid", "")
if not taskid:
raise SynologyError("DSM did not return a task ID.", code=0)
try:
status_data = await self.request(
api, "status", version=poll_version, params={"taskid": taskid}
)
except SynologyError as e:
if e.code == 599:
return taskid, None
raise
return taskid, status_data
async def download_bytes(self, path: str) -> tuple[str, bytes]: async def download_bytes(self, path: str) -> tuple[str, bytes]:
"""Download a file from the NAS via SYNO.FileStation.Download. """Download a file from the NAS via SYNO.FileStation.Download.
@@ -69,15 +69,13 @@ def register_filestation(
) -> tuple[bool, dict[str, Any] | str]: ) -> tuple[bool, dict[str, Any] | str]:
"""Poll a DSM async task until finished or timeout. """Poll a DSM async task until finished or timeout.
For tasks that return intermediate ``finished=False`` status while
running (CopyMove, Delete, Compress, Extract, Search). Use
``_poll_oneshot`` for DirSize and MD5.
Args: Args:
api: DSM API name (e.g. "SYNO.FileStation.CopyMove"). api: DSM API name (e.g. "SYNO.FileStation.CopyMove").
version: API version to use for the status call. version: API version to use for the status call.
taskid: Task ID returned by the corresponding start method. taskid: Task ID returned by the corresponding start method.
initial_delay: Seconds to wait before the first status poll. initial_delay: Seconds to wait before the first status poll.
Set to 0.0 for tasks that may finish before the first poll
interval (e.g. DirSize on small directories, MD5 on small files).
Returns: Returns:
``(True, status_dict)`` on success, or ``(False, "Error: …")`` on ``(True, status_dict)`` on success, or ``(False, "Error: …")`` on
@@ -88,6 +86,7 @@ def register_filestation(
delay = 0.2 delay = 0.2
elapsed = initial_delay elapsed = initial_delay
timeout = 60.0 timeout = 60.0
consecutive_599 = 0
if initial_delay > 0: if initial_delay > 0:
await asyncio.sleep(initial_delay) await asyncio.sleep(initial_delay)
@@ -100,9 +99,14 @@ def register_filestation(
version=version, version=version,
params={"taskid": taskid}, params={"taskid": taskid},
) )
consecutive_599 = 0
except _SynologyError as e: except _SynologyError as e:
if e.code == 599: if e.code == 599:
pass # task not yet visible — keep polling # 599 can be transient (task just started, not yet available).
# Retry up to 5 times before giving up.
consecutive_599 += 1
if consecutive_599 >= 5:
return False, f"Error: {e}"
else: else:
return False, f"Error: {e}" return False, f"Error: {e}"
else: else:
@@ -119,89 +123,6 @@ def register_filestation(
elapsed += delay elapsed += delay
delay = min(delay * 2, 2.0) delay = min(delay * 2, 2.0)
async def _poll_oneshot(
api: str,
version: int,
taskid: str,
first_status: dict[str, Any] | None,
) -> tuple[bool, dict[str, Any] | str]:
"""Continue polling a one-shot DSM task after the first status poll.
Called after ``client.start_and_poll_immediately`` has already made
the first status request. Handles three outcomes for ``first_status``:
* ``finished=True`` — return immediately (task done on first poll).
* ``finished=False`` — task confirmed running; enter Phase 2
(exponential backoff until ``finished=True`` or 60 s timeout).
* ``None`` (first poll returned 599) — burst-retry 10× at 10 ms,
then enter Phase 2 regardless (large directories will eventually
return ``finished=False``; a 599 after the task was seen alive
means the window closed — fail fast with a retry message).
Returns:
``(True, status_dict)`` on success, or ``(False, "Error: …")``
on DSM error or timeout.
"""
from mcp_synology_filestation.client import SynologyError as _SynologyError
seen_alive = False
if first_status is not None:
if first_status.get("finished"):
return True, first_status
seen_alive = True # finished=False: task is running
else:
# 599 on the immediate poll: burst-retry (10×, 10 ms apart)
for _ in range(10):
await asyncio.sleep(0.01)
try:
s = await client.request(
api, "status", version=version, params={"taskid": taskid}
)
except _SynologyError as e:
if e.code == 599:
continue
return False, f"Error: {e}"
if s.get("finished"):
return True, s
seen_alive = True
break # finished=False: enter Phase 2
# ── Phase 2: exponential backoff until finished or 60 s timeout ──
delay = 0.2
elapsed = 0.0
timeout = 60.0
while True:
await asyncio.sleep(delay)
elapsed += delay
delay = min(delay * 2, 2.0)
try:
s = await client.request(api, "status", version=version, params={"taskid": taskid})
except _SynologyError as e:
if e.code == 599:
if seen_alive:
# Task was running but the one-shot window closed before we read it
return (
False,
"Error: Could not read task result — the operation finished"
" before the result was polled. Please retry.",
)
# Not yet seen alive: large dir still initialising, keep polling
else:
return False, f"Error: {e}"
else:
seen_alive = True
if s.get("finished"):
return True, s
if elapsed >= timeout:
return (
False,
"Error: Operation timed out after 60 seconds — check NAS manually.",
)
@mcp.tool() @mcp.tool()
async def list_shares(): async def list_shares():
"""List all shared folders. Returns name/path/volume-usage table.""" """List all shared folders. Returns name/path/volume-usage table."""
@@ -890,16 +811,20 @@ def register_filestation(
return "Error: no path provided." return "Error: no path provided."
try: try:
taskid, first_status = await client.start_and_poll_immediately( start_data = await client.request(
"SYNO.FileStation.DirSize", "SYNO.FileStation.DirSize",
start_params={"path": json.dumps(paths)}, "start",
poll_version=1, version=2,
start_version=2, params={"path": json.dumps(paths)},
) )
except SynologyError as e: except SynologyError as e:
return f"Error: {e}" return f"Error: {e}"
ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid, first_status) taskid: str = start_data.get("taskid", "")
if not taskid:
return "Error: DSM did not return a task ID."
ok, result = await _poll_task("SYNO.FileStation.DirSize", 1, taskid, initial_delay=0.0)
if not ok: if not ok:
return result # type: ignore[return-value] return result # type: ignore[return-value]
@@ -947,16 +872,20 @@ def register_filestation(
from mcp_synology_filestation.client import SynologyError from mcp_synology_filestation.client import SynologyError
try: try:
taskid, first_status = await client.start_and_poll_immediately( start_data = await client.request(
"SYNO.FileStation.MD5", "SYNO.FileStation.MD5",
start_params={"file_path": json.dumps(path)}, "start",
poll_version=1, version=2,
start_version=2, params={"file_path": json.dumps(path)},
) )
except SynologyError as e: except SynologyError as e:
return f"Error: {e}" return f"Error: {e}"
ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid, first_status) taskid: str = start_data.get("taskid", "")
if not taskid:
return "Error: DSM did not return a task ID."
ok, result = await _poll_task("SYNO.FileStation.MD5", 1, taskid, initial_delay=0.0)
if not ok: if not ok:
return result # type: ignore[return-value] return result # type: ignore[return-value]
+2 -23
View File
@@ -22,24 +22,8 @@ def config() -> AppConfig:
def _make_mcp_and_tools(config: AppConfig, client: MagicMock) -> dict: def _make_mcp_and_tools(config: AppConfig, client: MagicMock) -> dict:
"""Register FileStation tools on a mock FastMCP and collect them by name.""" """Register FileStation tools on a mock FastMCP and collect them by name."""
from mcp_synology_filestation.client import FileStationClient
from mcp_synology_filestation.tools.filestation import register_filestation from mcp_synology_filestation.tools.filestation import register_filestation
# Bind the real start_and_poll_immediately so it delegates into the
# already-mocked client.request — no separate mock needed per test.
async def _start_and_poll_immediately(
api: str,
start_params: dict,
poll_version: int,
*,
start_version: int | None = None,
):
return await FileStationClient.start_and_poll_immediately(
client, api, start_params, poll_version, start_version=start_version
)
client.start_and_poll_immediately = _start_and_poll_immediately
registered: dict[str, object] = {} registered: dict[str, object] = {}
mcp = MagicMock() mcp = MagicMock()
@@ -1651,12 +1635,8 @@ async def test_dir_size_retries_on_transient_599(config: AppConfig) -> None:
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_dir_size_times_out_on_persistent_599(config: AppConfig) -> None: async def test_dir_size_fails_after_5_consecutive_599(config: AppConfig) -> None:
"""dir_size times out after 60 s when DSM returns only 599s for every poll. """dir_size gives up and returns Error: after 5 consecutive 599 responses."""
The immediate poll + burst both return 599; Phase 2 keeps polling (large
directories eventually surface) until the 60 s timeout fires.
"""
client = MagicMock() client = MagicMock()
async def _request(api, method, version=None, params=None, **kwargs): async def _request(api, method, version=None, params=None, **kwargs):
@@ -1671,7 +1651,6 @@ async def test_dir_size_times_out_on_persistent_599(config: AppConfig) -> None:
result = await tools["dir_size"](path="/dead") result = await tools["dir_size"](path="/dead")
assert result.startswith("Error:") assert result.startswith("Error:")
assert "timed out" in result.lower() or "60 seconds" in result
# ────────────────────────────────────────────────────────────────────────── # ──────────────────────────────────────────────────────────────────────────