fix: _poll_oneshot for DirSize/MD5 with burst-retry on early 599
Add FileStationClient.start_and_poll_immediately: starts the async task and immediately makes the first status poll within the same method, with no intermediate awaits other than the two HTTP calls. This minimises scheduler latency between start and first poll for one-shot tasks. _poll_oneshot now accepts the first_status from start_and_poll_immediately: - finished=True on first poll → return immediately - finished=False → Phase 2 (exponential backoff, 60 s timeout) - None (first poll was 599) → burst-retry 10× at 10 ms, then Phase 2 (Phase 2 keeps polling through 599 until seen_alive, then fails fast) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "mcp-synology-filestation"
|
||||
version = "0.2.6"
|
||||
version = "0.2.7"
|
||||
description = "MCP server for Synology FileStation"
|
||||
requires-python = ">=3.12"
|
||||
dependencies = [
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""MCP server for Synology FileStation."""
|
||||
|
||||
__version__ = "0.2.6"
|
||||
__version__ = "0.2.7"
|
||||
|
||||
@@ -307,6 +307,52 @@ class FileStationClient:
|
||||
|
||||
raise SynologyError(_error_message(code, api), code=code)
|
||||
|
||||
async def start_and_poll_immediately(
|
||||
self,
|
||||
api: str,
|
||||
start_params: dict[str, Any],
|
||||
poll_version: int,
|
||||
*,
|
||||
start_version: int | None = None,
|
||||
) -> tuple[str, dict[str, Any] | None]:
|
||||
"""Start a DSM async task and immediately make the first status poll.
|
||||
|
||||
Designed for one-shot tasks (DirSize, MD5) where the result window
|
||||
may close quickly. Both the ``start`` and the first ``status`` request
|
||||
are issued inside this single method with no intermediate awaits other
|
||||
than the HTTP calls themselves, minimising scheduler latency.
|
||||
|
||||
Args:
|
||||
api: DSM API name (e.g. "SYNO.FileStation.DirSize").
|
||||
start_params: Query parameters for the ``start`` call.
|
||||
poll_version: API version to use for the ``status`` call.
|
||||
start_version: API version for the ``start`` call (defaults to
|
||||
``maxVersion`` from the API info cache).
|
||||
|
||||
Returns:
|
||||
``(taskid, status_data)`` where ``status_data`` is ``None`` if
|
||||
the first status poll returned 599 (task not yet visible).
|
||||
|
||||
Raises:
|
||||
SynologyError: If the ``start`` call fails, the response contains
|
||||
no task ID, or the ``status`` call fails with a non-599 error.
|
||||
"""
|
||||
start_data = await self.request(api, "start", version=start_version, params=start_params)
|
||||
taskid: str = start_data.get("taskid", "")
|
||||
if not taskid:
|
||||
raise SynologyError("DSM did not return a task ID.", code=0)
|
||||
|
||||
try:
|
||||
status_data = await self.request(
|
||||
api, "status", version=poll_version, params={"taskid": taskid}
|
||||
)
|
||||
except SynologyError as e:
|
||||
if e.code == 599:
|
||||
return taskid, None
|
||||
raise
|
||||
|
||||
return taskid, status_data
|
||||
|
||||
async def download_bytes(self, path: str) -> tuple[str, bytes]:
|
||||
"""Download a file from the NAS via SYNO.FileStation.Download.
|
||||
|
||||
|
||||
@@ -123,58 +123,53 @@ def register_filestation(
|
||||
api: str,
|
||||
version: int,
|
||||
taskid: str,
|
||||
first_status: dict[str, Any] | None,
|
||||
) -> tuple[bool, dict[str, Any] | str]:
|
||||
"""Poll a one-shot DSM task (DirSize, MD5).
|
||||
"""Continue polling a one-shot DSM task after the first status poll.
|
||||
|
||||
One-shot tasks deliver ``finished=True`` exactly once; after that,
|
||||
status polls return 599. Two phases:
|
||||
Called after ``client.start_and_poll_immediately`` has already made
|
||||
the first status request. Handles three outcomes for ``first_status``:
|
||||
|
||||
Phase 1 — burst: polls immediately, then up to 10 times at 50 ms
|
||||
intervals. This catches tasks that complete in under ~500 ms.
|
||||
If all burst polls return 599, the result window was missed.
|
||||
|
||||
Phase 2 — normal: entered only after receiving ``finished=False``
|
||||
(task confirmed running). Exponential backoff up to 60 s. A 599 in
|
||||
this phase means the window closed before we polled — fail fast.
|
||||
* ``finished=True`` — return immediately (task done on first poll).
|
||||
* ``finished=False`` — task confirmed running; enter Phase 2
|
||||
(exponential backoff until ``finished=True`` or 60 s timeout).
|
||||
* ``None`` (first poll returned 599) — burst-retry 10× at 10 ms,
|
||||
then enter Phase 2 regardless (large directories will eventually
|
||||
return ``finished=False``; a 599 after the task was seen alive
|
||||
means the window closed — fail fast with a retry message).
|
||||
|
||||
Returns:
|
||||
``(True, status_dict)`` on success, or ``(False, "Error: …")``
|
||||
on DSM error, missed window, or timeout.
|
||||
on DSM error or timeout.
|
||||
"""
|
||||
from mcp_synology_filestation.client import SynologyError as _SynologyError
|
||||
|
||||
burst_count = 10
|
||||
burst_interval = 0.05 # 50 ms between burst retries
|
||||
seen_alive = False
|
||||
|
||||
# ── Phase 1: burst ────────────────────────────────────────────────
|
||||
for attempt in range(burst_count + 1):
|
||||
if attempt > 0:
|
||||
await asyncio.sleep(burst_interval)
|
||||
try:
|
||||
status_data = await client.request(
|
||||
api,
|
||||
"status",
|
||||
version=version,
|
||||
params={"taskid": taskid},
|
||||
)
|
||||
except _SynologyError as e:
|
||||
if e.code != 599:
|
||||
return False, f"Error: {e}"
|
||||
continue # 599 — task not visible yet, keep bursting
|
||||
if status_data.get("finished"):
|
||||
return True, status_data
|
||||
break # finished=False — task confirmed running, enter Phase 2
|
||||
if first_status is not None:
|
||||
if first_status.get("finished"):
|
||||
return True, first_status
|
||||
seen_alive = True # finished=False: task is running
|
||||
else:
|
||||
# All burst polls returned 599 — one-shot window was missed
|
||||
return (
|
||||
False,
|
||||
"Error: Could not read task result — the operation finished"
|
||||
" before the first successful poll. Please retry.",
|
||||
)
|
||||
# 599 on the immediate poll: burst-retry (10×, 10 ms apart)
|
||||
for _ in range(10):
|
||||
await asyncio.sleep(0.01)
|
||||
try:
|
||||
s = await client.request(
|
||||
api, "status", version=version, params={"taskid": taskid}
|
||||
)
|
||||
except _SynologyError as e:
|
||||
if e.code == 599:
|
||||
continue
|
||||
return False, f"Error: {e}"
|
||||
if s.get("finished"):
|
||||
return True, s
|
||||
seen_alive = True
|
||||
break # finished=False: enter Phase 2
|
||||
|
||||
# ── Phase 2: exponential backoff ──────────────────────────────────
|
||||
# ── Phase 2: exponential backoff until finished or 60 s timeout ──
|
||||
delay = 0.2
|
||||
elapsed = burst_count * burst_interval # time already spent in burst
|
||||
elapsed = 0.0
|
||||
timeout = 60.0
|
||||
|
||||
while True:
|
||||
@@ -183,23 +178,23 @@ def register_filestation(
|
||||
delay = min(delay * 2, 2.0)
|
||||
|
||||
try:
|
||||
status_data = await client.request(
|
||||
api,
|
||||
"status",
|
||||
version=version,
|
||||
params={"taskid": taskid},
|
||||
)
|
||||
s = await client.request(api, "status", version=version, params={"taskid": taskid})
|
||||
except _SynologyError as e:
|
||||
if e.code == 599:
|
||||
return (
|
||||
False,
|
||||
"Error: Could not read task result — the operation finished"
|
||||
" before the result was polled. Please retry.",
|
||||
)
|
||||
return False, f"Error: {e}"
|
||||
|
||||
if status_data.get("finished"):
|
||||
return True, status_data
|
||||
if seen_alive:
|
||||
# Task was running but the one-shot window closed before we read it
|
||||
return (
|
||||
False,
|
||||
"Error: Could not read task result — the operation finished"
|
||||
" before the result was polled. Please retry.",
|
||||
)
|
||||
# Not yet seen alive: large dir still initialising, keep polling
|
||||
else:
|
||||
return False, f"Error: {e}"
|
||||
else:
|
||||
seen_alive = True
|
||||
if s.get("finished"):
|
||||
return True, s
|
||||
|
||||
if elapsed >= timeout:
|
||||
return (
|
||||
@@ -895,20 +890,16 @@ def register_filestation(
|
||||
return "Error: no path provided."
|
||||
|
||||
try:
|
||||
start_data = await client.request(
|
||||
taskid, first_status = await client.start_and_poll_immediately(
|
||||
"SYNO.FileStation.DirSize",
|
||||
"start",
|
||||
version=2,
|
||||
params={"path": json.dumps(paths)},
|
||||
start_params={"path": json.dumps(paths)},
|
||||
poll_version=1,
|
||||
start_version=2,
|
||||
)
|
||||
except SynologyError as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
taskid: str = start_data.get("taskid", "")
|
||||
if not taskid:
|
||||
return "Error: DSM did not return a task ID."
|
||||
|
||||
ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid)
|
||||
ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid, first_status)
|
||||
if not ok:
|
||||
return result # type: ignore[return-value]
|
||||
|
||||
@@ -956,20 +947,16 @@ def register_filestation(
|
||||
from mcp_synology_filestation.client import SynologyError
|
||||
|
||||
try:
|
||||
start_data = await client.request(
|
||||
taskid, first_status = await client.start_and_poll_immediately(
|
||||
"SYNO.FileStation.MD5",
|
||||
"start",
|
||||
version=2,
|
||||
params={"file_path": json.dumps(path)},
|
||||
start_params={"file_path": json.dumps(path)},
|
||||
poll_version=1,
|
||||
start_version=2,
|
||||
)
|
||||
except SynologyError as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
taskid: str = start_data.get("taskid", "")
|
||||
if not taskid:
|
||||
return "Error: DSM did not return a task ID."
|
||||
|
||||
ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid)
|
||||
ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid, first_status)
|
||||
if not ok:
|
||||
return result # type: ignore[return-value]
|
||||
|
||||
|
||||
@@ -22,8 +22,24 @@ def config() -> AppConfig:
|
||||
|
||||
def _make_mcp_and_tools(config: AppConfig, client: MagicMock) -> dict:
|
||||
"""Register FileStation tools on a mock FastMCP and collect them by name."""
|
||||
from mcp_synology_filestation.client import FileStationClient
|
||||
from mcp_synology_filestation.tools.filestation import register_filestation
|
||||
|
||||
# Bind the real start_and_poll_immediately so it delegates into the
|
||||
# already-mocked client.request — no separate mock needed per test.
|
||||
async def _start_and_poll_immediately(
|
||||
api: str,
|
||||
start_params: dict,
|
||||
poll_version: int,
|
||||
*,
|
||||
start_version: int | None = None,
|
||||
):
|
||||
return await FileStationClient.start_and_poll_immediately(
|
||||
client, api, start_params, poll_version, start_version=start_version
|
||||
)
|
||||
|
||||
client.start_and_poll_immediately = _start_and_poll_immediately
|
||||
|
||||
registered: dict[str, object] = {}
|
||||
|
||||
mcp = MagicMock()
|
||||
@@ -1635,12 +1651,11 @@ async def test_dir_size_retries_on_transient_599(config: AppConfig) -> None:
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_dir_size_window_timeout_on_persistent_599(config: AppConfig) -> None:
|
||||
"""dir_size fails fast (window_timeout=3 s) when DSM returns only 599s.
|
||||
async def test_dir_size_times_out_on_persistent_599(config: AppConfig) -> None:
|
||||
"""dir_size times out after 60 s when DSM returns only 599s for every poll.
|
||||
|
||||
Once the window_timeout elapses without ever seeing the task running
|
||||
(finished=False), _poll_task returns the "result window missed" error
|
||||
rather than waiting the full 60 s.
|
||||
The immediate poll + burst both return 599; Phase 2 keeps polling (large
|
||||
directories eventually surface) until the 60 s timeout fires.
|
||||
"""
|
||||
client = MagicMock()
|
||||
|
||||
@@ -1656,7 +1671,7 @@ async def test_dir_size_window_timeout_on_persistent_599(config: AppConfig) -> N
|
||||
result = await tools["dir_size"](path="/dead")
|
||||
|
||||
assert result.startswith("Error:")
|
||||
assert "retry" in result.lower() or "window" in result.lower() or "poll" in result.lower()
|
||||
assert "timed out" in result.lower() or "60 seconds" in result
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
Reference in New Issue
Block a user