fix: cold-start 599 on DirSize/MD5 — restart task instead of giving up
DSM's DirSize and MD5 background service needs ~6-8 s to initialise
after a period of inactivity. During this cold-start window tasks are
registered but every status poll returns error 599 ("no such task").
Replace the ad-hoc start+_poll_task call in dir_size and get_md5 with
a new _start_and_poll_oneshot helper that:
- polls with exponential backoff (0.2 s → cap 2 s)
- on 5 consecutive 599s: restarts the task (up to 6 attempts total)
- honours a shared 60 s wall-clock budget across all restarts
- returns a clear error if all restart attempts are exhausted
Root cause confirmed by test_dirsize_md5.py: after ~6 s / 2 restarts
the service warms up and the very first poll on the new task succeeds.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "mcp-synology-filestation"
|
||||
version = "0.2.8"
|
||||
version = "0.2.9"
|
||||
description = "MCP server for Synology FileStation"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""MCP server for Synology FileStation."""
|
||||
|
||||
__version__ = "0.2.8"
|
||||
__version__ = "0.2.9"
|
||||
|
||||
@@ -59,7 +59,88 @@ def register_filestation(
|
||||
client: FileStationClient for DSM API calls.
|
||||
"""
|
||||
|
||||
# ── internal polling helper ───────────────────────────────────────────
|
||||
# ── internal polling helpers ──────────────────────────────────────────
|
||||
|
||||
async def _start_and_poll_oneshot(
|
||||
api: str,
|
||||
start_params: dict[str, Any],
|
||||
start_version: int,
|
||||
poll_version: int,
|
||||
) -> tuple[bool, dict[str, Any] | str]:
|
||||
"""Start a one-shot DSM task and poll until finished, restarting on cold-start 599s.
|
||||
|
||||
DirSize and MD5 are "one-shot" tasks: DSM delivers ``finished=True`` exactly
|
||||
once, then discards the result. Additionally, the DSM background service for
|
||||
these tasks occasionally needs a few seconds to initialise after a period of
|
||||
inactivity ("cold start"). During cold start the service registers task IDs
|
||||
but returns error 599 on every status poll. The correct recovery is to restart
|
||||
the task once the service has had time to wake up.
|
||||
|
||||
Args:
|
||||
api: DSM API name (e.g. "SYNO.FileStation.DirSize").
|
||||
start_params: Parameters forwarded to the ``start`` method.
|
||||
start_version: API version for the ``start`` call.
|
||||
poll_version: API version for the ``status`` call.
|
||||
|
||||
Returns:
|
||||
``(True, status_dict)`` on success, or ``(False, "Error: …")`` on
|
||||
DSM error or timeout.
|
||||
"""
|
||||
from mcp_synology_filestation.client import SynologyError as _SynologyError
|
||||
|
||||
max_restarts = 6
|
||||
timeout = 60.0
|
||||
total_elapsed = 0.0
|
||||
|
||||
for _attempt in range(max_restarts):
|
||||
try:
|
||||
start_data = await client.request(
|
||||
api, "start", version=start_version, params=start_params
|
||||
)
|
||||
except _SynologyError as e:
|
||||
return False, f"Error: {e}"
|
||||
|
||||
taskid: str = start_data.get("taskid", "")
|
||||
if not taskid:
|
||||
return False, "Error: DSM did not return a task ID."
|
||||
|
||||
# Poll with exponential backoff; restart on 5 consecutive 599s
|
||||
delay = 0.2
|
||||
consecutive_599 = 0
|
||||
|
||||
while True:
|
||||
try:
|
||||
status_data = await client.request(
|
||||
api, "status", version=poll_version, params={"taskid": taskid}
|
||||
)
|
||||
consecutive_599 = 0
|
||||
if status_data.get("finished"):
|
||||
return True, status_data
|
||||
# Still running — keep polling
|
||||
except _SynologyError as e:
|
||||
if e.code != 599:
|
||||
return False, f"Error: {e}"
|
||||
consecutive_599 += 1
|
||||
if consecutive_599 >= 5:
|
||||
# 5× 599 in a row: either cold start or missed result window.
|
||||
# Restart the task so DSM can re-queue it.
|
||||
break
|
||||
|
||||
if total_elapsed >= timeout:
|
||||
return (
|
||||
False,
|
||||
"Error: Operation timed out after 60 seconds — check NAS manually.",
|
||||
)
|
||||
|
||||
await asyncio.sleep(delay)
|
||||
total_elapsed += delay
|
||||
delay = min(delay * 2, 2.0)
|
||||
|
||||
return (
|
||||
False,
|
||||
"Error: DSM did not return results after multiple retries"
|
||||
" (service may be starting up — try again in a moment).",
|
||||
)
|
||||
|
||||
async def _poll_task(
|
||||
api: str,
|
||||
@@ -804,27 +885,16 @@ def register_filestation(
|
||||
async def dir_size(path: str):
|
||||
"""Get total size, file count and folder count for one or more directories.
|
||||
path: comma-separated share-relative paths."""
|
||||
from mcp_synology_filestation.client import SynologyError
|
||||
|
||||
paths = [p.strip() for p in path.split(",") if p.strip()]
|
||||
if not paths:
|
||||
return "Error: no path provided."
|
||||
|
||||
try:
|
||||
start_data = await client.request(
|
||||
ok, result = await _start_and_poll_oneshot(
|
||||
"SYNO.FileStation.DirSize",
|
||||
"start",
|
||||
version=2,
|
||||
params={"path": json.dumps(paths)},
|
||||
start_params={"path": json.dumps(paths)},
|
||||
start_version=2,
|
||||
poll_version=1,
|
||||
)
|
||||
except SynologyError as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
taskid: str = start_data.get("taskid", "")
|
||||
if not taskid:
|
||||
return "Error: DSM did not return a task ID."
|
||||
|
||||
ok, result = await _poll_task("SYNO.FileStation.DirSize", 1, taskid, initial_delay=0.0)
|
||||
if not ok:
|
||||
return result # type: ignore[return-value]
|
||||
|
||||
@@ -869,23 +939,12 @@ def register_filestation(
|
||||
@mcp.tool()
|
||||
async def get_md5(path: str):
|
||||
"""Compute the MD5 checksum of a file on the NAS. path: share-relative file path."""
|
||||
from mcp_synology_filestation.client import SynologyError
|
||||
|
||||
try:
|
||||
start_data = await client.request(
|
||||
ok, result = await _start_and_poll_oneshot(
|
||||
"SYNO.FileStation.MD5",
|
||||
"start",
|
||||
version=2,
|
||||
params={"file_path": json.dumps(path)},
|
||||
start_params={"file_path": json.dumps(path)},
|
||||
start_version=2,
|
||||
poll_version=1,
|
||||
)
|
||||
except SynologyError as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
taskid: str = start_data.get("taskid", "")
|
||||
if not taskid:
|
||||
return "Error: DSM did not return a task ID."
|
||||
|
||||
ok, result = await _poll_task("SYNO.FileStation.MD5", 1, taskid, initial_delay=0.0)
|
||||
if not ok:
|
||||
return result # type: ignore[return-value]
|
||||
|
||||
|
||||
@@ -1636,7 +1636,7 @@ async def test_dir_size_retries_on_transient_599(config: AppConfig) -> None:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dir_size_fails_after_5_consecutive_599(config: AppConfig) -> None:
|
||||
"""dir_size gives up and returns Error: after 5 consecutive 599 responses."""
|
||||
"""dir_size gives up and returns Error: after exhausting all restart attempts."""
|
||||
client = MagicMock()
|
||||
|
||||
async def _request(api, method, version=None, params=None, **kwargs):
|
||||
@@ -1653,6 +1653,34 @@ async def test_dir_size_fails_after_5_consecutive_599(config: AppConfig) -> None
|
||||
assert result.startswith("Error:")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dir_size_cold_start_restart(config: AppConfig) -> None:
|
||||
"""dir_size restarts the task after 5 consecutive 599s and succeeds on second attempt."""
|
||||
client = MagicMock()
|
||||
start_count = {"n": 0}
|
||||
status_count = {"n": 0}
|
||||
|
||||
async def _request(api, method, version=None, params=None, **kwargs):
|
||||
if method == "start":
|
||||
start_count["n"] += 1
|
||||
return {"taskid": f"task_{start_count['n']}"}
|
||||
status_count["n"] += 1
|
||||
# First 5 status calls → 599 (simulates cold start)
|
||||
if status_count["n"] <= 5:
|
||||
raise SynologyError("DSM error code 599", code=599)
|
||||
# After restart: immediately done
|
||||
return {"finished": True, "num_dir": 1, "num_file": 5, "total_size": 1024}
|
||||
|
||||
client.request = AsyncMock(side_effect=_request)
|
||||
tools = _make_mcp_and_tools(config, client)
|
||||
|
||||
with patch("asyncio.sleep", new_callable=AsyncMock):
|
||||
result = await tools["dir_size"](path="/coldstart")
|
||||
|
||||
assert "Total Size" in result
|
||||
assert start_count["n"] == 2 # task was restarted once after cold-start 599s
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
# get_md5
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user