From 451ee7116fe3827b54a352cfd36d9d4e1fda1470 Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Tue, 14 Apr 2026 14:49:32 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20cold-start=20599=20on=20DirSize/MD5=20?= =?UTF-8?q?=E2=80=94=20restart=20task=20instead=20of=20giving=20up?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DSM's DirSize and MD5 background service needs ~6-8 s to initialise after a period of inactivity. During this cold-start window tasks are registered but every status poll returns error 599 ("no such task"). Replace the ad-hoc start+_poll_task call in dir_size and get_md5 with a new _start_and_poll_oneshot helper that: - polls with exponential backoff (0.2 s → cap 2 s) - on 5 consecutive 599s: restarts the task (up to 6 attempts total) - honours a shared 60 s wall-clock budget across all restarts - returns a clear error if all restart attempts are exhausted Root cause confirmed by test_dirsize_md5.py: after ~6 s / 2 restarts the service warms up and the very first poll on the new task succeeds. Co-Authored-By: Claude Sonnet 4.6 --- pyproject.toml | 2 +- src/mcp_synology_filestation/__init__.py | 2 +- .../tools/filestation.py | 129 +++++++++++++----- tests/test_tools_filestation.py | 30 +++- 4 files changed, 125 insertions(+), 38 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 55aeb2f..ed11b50 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-synology-filestation" -version = "0.2.8" +version = "0.2.9" description = "MCP server for Synology FileStation" requires-python = ">=3.12" dependencies = [ diff --git a/src/mcp_synology_filestation/__init__.py b/src/mcp_synology_filestation/__init__.py index 25b6fe8..d380823 100644 --- a/src/mcp_synology_filestation/__init__.py +++ b/src/mcp_synology_filestation/__init__.py @@ -1,3 +1,3 @@ """MCP server for Synology FileStation.""" -__version__ = "0.2.8" +__version__ = "0.2.9" diff --git a/src/mcp_synology_filestation/tools/filestation.py b/src/mcp_synology_filestation/tools/filestation.py index 69c8fa6..af9859e 100644 --- a/src/mcp_synology_filestation/tools/filestation.py +++ b/src/mcp_synology_filestation/tools/filestation.py @@ -59,7 +59,88 @@ def register_filestation( client: FileStationClient for DSM API calls. """ - # ── internal polling helper ─────────────────────────────────────────── + # ── internal polling helpers ────────────────────────────────────────── + + async def _start_and_poll_oneshot( + api: str, + start_params: dict[str, Any], + start_version: int, + poll_version: int, + ) -> tuple[bool, dict[str, Any] | str]: + """Start a one-shot DSM task and poll until finished, restarting on cold-start 599s. + + DirSize and MD5 are "one-shot" tasks: DSM delivers ``finished=True`` exactly + once, then discards the result. Additionally, the DSM background service for + these tasks occasionally needs a few seconds to initialise after a period of + inactivity ("cold start"). During cold start the service registers task IDs + but returns error 599 on every status poll. The correct recovery is to restart + the task once the service has had time to wake up. + + Args: + api: DSM API name (e.g. "SYNO.FileStation.DirSize"). + start_params: Parameters forwarded to the ``start`` method. + start_version: API version for the ``start`` call. + poll_version: API version for the ``status`` call. + + Returns: + ``(True, status_dict)`` on success, or ``(False, "Error: …")`` on + DSM error or timeout. + """ + from mcp_synology_filestation.client import SynologyError as _SynologyError + + max_restarts = 6 + timeout = 60.0 + total_elapsed = 0.0 + + for _attempt in range(max_restarts): + try: + start_data = await client.request( + api, "start", version=start_version, params=start_params + ) + except _SynologyError as e: + return False, f"Error: {e}" + + taskid: str = start_data.get("taskid", "") + if not taskid: + return False, "Error: DSM did not return a task ID." + + # Poll with exponential backoff; restart on 5 consecutive 599s + delay = 0.2 + consecutive_599 = 0 + + while True: + try: + status_data = await client.request( + api, "status", version=poll_version, params={"taskid": taskid} + ) + consecutive_599 = 0 + if status_data.get("finished"): + return True, status_data + # Still running — keep polling + except _SynologyError as e: + if e.code != 599: + return False, f"Error: {e}" + consecutive_599 += 1 + if consecutive_599 >= 5: + # 5× 599 in a row: either cold start or missed result window. + # Restart the task so DSM can re-queue it. + break + + if total_elapsed >= timeout: + return ( + False, + "Error: Operation timed out after 60 seconds — check NAS manually.", + ) + + await asyncio.sleep(delay) + total_elapsed += delay + delay = min(delay * 2, 2.0) + + return ( + False, + "Error: DSM did not return results after multiple retries" + " (service may be starting up — try again in a moment).", + ) async def _poll_task( api: str, @@ -804,27 +885,16 @@ def register_filestation( async def dir_size(path: str): """Get total size, file count and folder count for one or more directories. path: comma-separated share-relative paths.""" - from mcp_synology_filestation.client import SynologyError - paths = [p.strip() for p in path.split(",") if p.strip()] if not paths: return "Error: no path provided." - try: - start_data = await client.request( - "SYNO.FileStation.DirSize", - "start", - version=2, - params={"path": json.dumps(paths)}, - ) - except SynologyError as e: - return f"Error: {e}" - - taskid: str = start_data.get("taskid", "") - if not taskid: - return "Error: DSM did not return a task ID." - - ok, result = await _poll_task("SYNO.FileStation.DirSize", 1, taskid, initial_delay=0.0) + ok, result = await _start_and_poll_oneshot( + "SYNO.FileStation.DirSize", + start_params={"path": json.dumps(paths)}, + start_version=2, + poll_version=1, + ) if not ok: return result # type: ignore[return-value] @@ -869,23 +939,12 @@ def register_filestation( @mcp.tool() async def get_md5(path: str): """Compute the MD5 checksum of a file on the NAS. path: share-relative file path.""" - from mcp_synology_filestation.client import SynologyError - - try: - start_data = await client.request( - "SYNO.FileStation.MD5", - "start", - version=2, - params={"file_path": json.dumps(path)}, - ) - except SynologyError as e: - return f"Error: {e}" - - taskid: str = start_data.get("taskid", "") - if not taskid: - return "Error: DSM did not return a task ID." - - ok, result = await _poll_task("SYNO.FileStation.MD5", 1, taskid, initial_delay=0.0) + ok, result = await _start_and_poll_oneshot( + "SYNO.FileStation.MD5", + start_params={"file_path": json.dumps(path)}, + start_version=2, + poll_version=1, + ) if not ok: return result # type: ignore[return-value] diff --git a/tests/test_tools_filestation.py b/tests/test_tools_filestation.py index affe958..b18769a 100644 --- a/tests/test_tools_filestation.py +++ b/tests/test_tools_filestation.py @@ -1636,7 +1636,7 @@ async def test_dir_size_retries_on_transient_599(config: AppConfig) -> None: @pytest.mark.asyncio async def test_dir_size_fails_after_5_consecutive_599(config: AppConfig) -> None: - """dir_size gives up and returns Error: after 5 consecutive 599 responses.""" + """dir_size gives up and returns Error: after exhausting all restart attempts.""" client = MagicMock() async def _request(api, method, version=None, params=None, **kwargs): @@ -1653,6 +1653,34 @@ async def test_dir_size_fails_after_5_consecutive_599(config: AppConfig) -> None assert result.startswith("Error:") +@pytest.mark.asyncio +async def test_dir_size_cold_start_restart(config: AppConfig) -> None: + """dir_size restarts the task after 5 consecutive 599s and succeeds on second attempt.""" + client = MagicMock() + start_count = {"n": 0} + status_count = {"n": 0} + + async def _request(api, method, version=None, params=None, **kwargs): + if method == "start": + start_count["n"] += 1 + return {"taskid": f"task_{start_count['n']}"} + status_count["n"] += 1 + # First 5 status calls → 599 (simulates cold start) + if status_count["n"] <= 5: + raise SynologyError("DSM error code 599", code=599) + # After restart: immediately done + return {"finished": True, "num_dir": 1, "num_file": 5, "total_size": 1024} + + client.request = AsyncMock(side_effect=_request) + tools = _make_mcp_and_tools(config, client) + + with patch("asyncio.sleep", new_callable=AsyncMock): + result = await tools["dir_size"](path="/coldstart") + + assert "Total Size" in result + assert start_count["n"] == 2 # task was restarted once after cold-start 599s + + # ────────────────────────────────────────────────────────────────────────── # get_md5 # ──────────────────────────────────────────────────────────────────────────