fix: _poll_oneshot for DirSize/MD5 with burst-retry on early 599

Replace _poll_task for one-shot tasks with _poll_oneshot, which uses two
phases: (1) a burst of up to 11 immediate polls at 50ms intervals to catch
tasks that complete in <500ms, and (2) exponential-backoff polling once
finished=False is observed. A 599 during burst → window missed (fail fast).
A 599 during Phase 2 (task was seen running) → same. _poll_task is
simplified back to a plain long-poll with no window_timeout logic.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-14 13:38:42 +02:00
parent 6510493930
commit 0e8ffaa6df
3 changed files with 97 additions and 33 deletions
+1 -1
View File
@@ -1,6 +1,6 @@
[project] [project]
name = "mcp-synology-filestation" name = "mcp-synology-filestation"
version = "0.2.5" version = "0.2.6"
description = "MCP server for Synology FileStation" description = "MCP server for Synology FileStation"
requires-python = ">=3.12" requires-python = ">=3.12"
dependencies = [ dependencies = [
+1 -1
View File
@@ -1,3 +1,3 @@
"""MCP server for Synology FileStation.""" """MCP server for Synology FileStation."""
__version__ = "0.2.5" __version__ = "0.2.6"
@@ -66,22 +66,18 @@ def register_filestation(
version: int, version: int,
taskid: str, taskid: str,
initial_delay: float = 0.2, initial_delay: float = 0.2,
window_timeout: float | None = None,
) -> tuple[bool, dict[str, Any] | str]: ) -> tuple[bool, dict[str, Any] | str]:
"""Poll a DSM async task until finished or timeout. """Poll a DSM async task until finished or timeout.
For tasks that return intermediate ``finished=False`` status while
running (CopyMove, Delete, Compress, Extract, Search). Use
``_poll_oneshot`` for DirSize and MD5.
Args: Args:
api: DSM API name (e.g. "SYNO.FileStation.CopyMove"). api: DSM API name (e.g. "SYNO.FileStation.CopyMove").
version: API version to use for the status call. version: API version to use for the status call.
taskid: Task ID returned by the corresponding start method. taskid: Task ID returned by the corresponding start method.
initial_delay: Seconds to wait before the first status poll. initial_delay: Seconds to wait before the first status poll.
Set to 0.0 for tasks that may finish before the first poll
interval (e.g. DirSize on small directories, MD5 on small files).
window_timeout: For one-shot tasks (DirSize, MD5) whose result is
available exactly once: if we receive nothing but 599 errors for
this many seconds without ever seeing the task running
(``finished=False``), the result window was missed — return an
error immediately instead of waiting for the full 60 s timeout.
Returns: Returns:
``(True, status_dict)`` on success, or ``(False, "Error: …")`` on ``(True, status_dict)`` on success, or ``(False, "Error: …")`` on
@@ -92,7 +88,6 @@ def register_filestation(
delay = 0.2 delay = 0.2
elapsed = initial_delay elapsed = initial_delay
timeout = 60.0 timeout = 60.0
seen_task_alive = False # True once we receive any non-599 status response
if initial_delay > 0: if initial_delay > 0:
await asyncio.sleep(initial_delay) await asyncio.sleep(initial_delay)
@@ -107,25 +102,10 @@ def register_filestation(
) )
except _SynologyError as e: except _SynologyError as e:
if e.code == 599: if e.code == 599:
# DSM 599 = task not found. For one-shot tasks (DirSize, MD5) pass # task not yet visible — keep polling
# this means either the task hasn't started yet or the result
# window has already closed. If we've never seen the task
# running and window_timeout has elapsed, the window is gone —
# fail fast so the caller can retry rather than wait 60 s.
if (
window_timeout is not None
and not seen_task_alive
and elapsed >= window_timeout
):
return (
False,
"Error: Could not read task result — the operation finished"
" before the first successful poll. Please retry.",
)
else: else:
return False, f"Error: {e}" return False, f"Error: {e}"
else: else:
seen_task_alive = True
if status_data.get("finished"): if status_data.get("finished"):
return True, status_data return True, status_data
@@ -139,6 +119,94 @@ def register_filestation(
elapsed += delay elapsed += delay
delay = min(delay * 2, 2.0) delay = min(delay * 2, 2.0)
async def _poll_oneshot(
api: str,
version: int,
taskid: str,
) -> tuple[bool, dict[str, Any] | str]:
"""Poll a one-shot DSM task (DirSize, MD5).
One-shot tasks deliver ``finished=True`` exactly once; after that,
status polls return 599. Two phases:
Phase 1 — burst: polls immediately, then up to 10 times at 50 ms
intervals. This catches tasks that complete in under ~500 ms.
If all burst polls return 599, the result window was missed.
Phase 2 — normal: entered only after receiving ``finished=False``
(task confirmed running). Exponential backoff up to 60 s. A 599 in
this phase means the window closed before we polled — fail fast.
Returns:
``(True, status_dict)`` on success, or ``(False, "Error: …")``
on DSM error, missed window, or timeout.
"""
from mcp_synology_filestation.client import SynologyError as _SynologyError
burst_count = 10
burst_interval = 0.05 # 50 ms between burst retries
# ── Phase 1: burst ────────────────────────────────────────────────
for attempt in range(burst_count + 1):
if attempt > 0:
await asyncio.sleep(burst_interval)
try:
status_data = await client.request(
api,
"status",
version=version,
params={"taskid": taskid},
)
except _SynologyError as e:
if e.code != 599:
return False, f"Error: {e}"
continue # 599 — task not visible yet, keep bursting
if status_data.get("finished"):
return True, status_data
break # finished=False — task confirmed running, enter Phase 2
else:
# All burst polls returned 599 — one-shot window was missed
return (
False,
"Error: Could not read task result — the operation finished"
" before the first successful poll. Please retry.",
)
# ── Phase 2: exponential backoff ──────────────────────────────────
delay = 0.2
elapsed = burst_count * burst_interval # time already spent in burst
timeout = 60.0
while True:
await asyncio.sleep(delay)
elapsed += delay
delay = min(delay * 2, 2.0)
try:
status_data = await client.request(
api,
"status",
version=version,
params={"taskid": taskid},
)
except _SynologyError as e:
if e.code == 599:
return (
False,
"Error: Could not read task result — the operation finished"
" before the result was polled. Please retry.",
)
return False, f"Error: {e}"
if status_data.get("finished"):
return True, status_data
if elapsed >= timeout:
return (
False,
"Error: Operation timed out after 60 seconds — check NAS manually.",
)
@mcp.tool() @mcp.tool()
async def list_shares(): async def list_shares():
"""List all shared folders. Returns name/path/volume-usage table.""" """List all shared folders. Returns name/path/volume-usage table."""
@@ -840,9 +908,7 @@ def register_filestation(
if not taskid: if not taskid:
return "Error: DSM did not return a task ID." return "Error: DSM did not return a task ID."
ok, result = await _poll_task( ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid)
"SYNO.FileStation.DirSize", 1, taskid, initial_delay=0.0, window_timeout=3.0
)
if not ok: if not ok:
return result # type: ignore[return-value] return result # type: ignore[return-value]
@@ -903,9 +969,7 @@ def register_filestation(
if not taskid: if not taskid:
return "Error: DSM did not return a task ID." return "Error: DSM did not return a task ID."
ok, result = await _poll_task( ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid)
"SYNO.FileStation.MD5", 1, taskid, initial_delay=0.0, window_timeout=3.0
)
if not ok: if not ok:
return result # type: ignore[return-value] return result # type: ignore[return-value]