fix: _poll_oneshot for DirSize/MD5 with burst-retry on early 599
Add FileStationClient.start_and_poll_immediately: starts the async task and immediately makes the first status poll within the same method, with no intermediate awaits other than the two HTTP calls. This minimises scheduler latency between start and first poll for one-shot tasks. _poll_oneshot now accepts the first_status from start_and_poll_immediately: - finished=True on first poll → return immediately - finished=False → Phase 2 (exponential backoff, 60 s timeout) - None (first poll was 599) → burst-retry 10× at 10 ms, then Phase 2 (Phase 2 keeps polling through 599 until seen_alive, then fails fast) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+1
-1
@@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "mcp-synology-filestation"
|
name = "mcp-synology-filestation"
|
||||||
version = "0.2.6"
|
version = "0.2.7"
|
||||||
description = "MCP server for Synology FileStation"
|
description = "MCP server for Synology FileStation"
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
"""MCP server for Synology FileStation."""
|
"""MCP server for Synology FileStation."""
|
||||||
|
|
||||||
__version__ = "0.2.6"
|
__version__ = "0.2.7"
|
||||||
|
|||||||
@@ -307,6 +307,52 @@ class FileStationClient:
|
|||||||
|
|
||||||
raise SynologyError(_error_message(code, api), code=code)
|
raise SynologyError(_error_message(code, api), code=code)
|
||||||
|
|
||||||
|
async def start_and_poll_immediately(
|
||||||
|
self,
|
||||||
|
api: str,
|
||||||
|
start_params: dict[str, Any],
|
||||||
|
poll_version: int,
|
||||||
|
*,
|
||||||
|
start_version: int | None = None,
|
||||||
|
) -> tuple[str, dict[str, Any] | None]:
|
||||||
|
"""Start a DSM async task and immediately make the first status poll.
|
||||||
|
|
||||||
|
Designed for one-shot tasks (DirSize, MD5) where the result window
|
||||||
|
may close quickly. Both the ``start`` and the first ``status`` request
|
||||||
|
are issued inside this single method with no intermediate awaits other
|
||||||
|
than the HTTP calls themselves, minimising scheduler latency.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api: DSM API name (e.g. "SYNO.FileStation.DirSize").
|
||||||
|
start_params: Query parameters for the ``start`` call.
|
||||||
|
poll_version: API version to use for the ``status`` call.
|
||||||
|
start_version: API version for the ``start`` call (defaults to
|
||||||
|
``maxVersion`` from the API info cache).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
``(taskid, status_data)`` where ``status_data`` is ``None`` if
|
||||||
|
the first status poll returned 599 (task not yet visible).
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
SynologyError: If the ``start`` call fails, the response contains
|
||||||
|
no task ID, or the ``status`` call fails with a non-599 error.
|
||||||
|
"""
|
||||||
|
start_data = await self.request(api, "start", version=start_version, params=start_params)
|
||||||
|
taskid: str = start_data.get("taskid", "")
|
||||||
|
if not taskid:
|
||||||
|
raise SynologyError("DSM did not return a task ID.", code=0)
|
||||||
|
|
||||||
|
try:
|
||||||
|
status_data = await self.request(
|
||||||
|
api, "status", version=poll_version, params={"taskid": taskid}
|
||||||
|
)
|
||||||
|
except SynologyError as e:
|
||||||
|
if e.code == 599:
|
||||||
|
return taskid, None
|
||||||
|
raise
|
||||||
|
|
||||||
|
return taskid, status_data
|
||||||
|
|
||||||
async def download_bytes(self, path: str) -> tuple[str, bytes]:
|
async def download_bytes(self, path: str) -> tuple[str, bytes]:
|
||||||
"""Download a file from the NAS via SYNO.FileStation.Download.
|
"""Download a file from the NAS via SYNO.FileStation.Download.
|
||||||
|
|
||||||
|
|||||||
@@ -123,58 +123,53 @@ def register_filestation(
|
|||||||
api: str,
|
api: str,
|
||||||
version: int,
|
version: int,
|
||||||
taskid: str,
|
taskid: str,
|
||||||
|
first_status: dict[str, Any] | None,
|
||||||
) -> tuple[bool, dict[str, Any] | str]:
|
) -> tuple[bool, dict[str, Any] | str]:
|
||||||
"""Poll a one-shot DSM task (DirSize, MD5).
|
"""Continue polling a one-shot DSM task after the first status poll.
|
||||||
|
|
||||||
One-shot tasks deliver ``finished=True`` exactly once; after that,
|
Called after ``client.start_and_poll_immediately`` has already made
|
||||||
status polls return 599. Two phases:
|
the first status request. Handles three outcomes for ``first_status``:
|
||||||
|
|
||||||
Phase 1 — burst: polls immediately, then up to 10 times at 50 ms
|
* ``finished=True`` — return immediately (task done on first poll).
|
||||||
intervals. This catches tasks that complete in under ~500 ms.
|
* ``finished=False`` — task confirmed running; enter Phase 2
|
||||||
If all burst polls return 599, the result window was missed.
|
(exponential backoff until ``finished=True`` or 60 s timeout).
|
||||||
|
* ``None`` (first poll returned 599) — burst-retry 10× at 10 ms,
|
||||||
Phase 2 — normal: entered only after receiving ``finished=False``
|
then enter Phase 2 regardless (large directories will eventually
|
||||||
(task confirmed running). Exponential backoff up to 60 s. A 599 in
|
return ``finished=False``; a 599 after the task was seen alive
|
||||||
this phase means the window closed before we polled — fail fast.
|
means the window closed — fail fast with a retry message).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
``(True, status_dict)`` on success, or ``(False, "Error: …")``
|
``(True, status_dict)`` on success, or ``(False, "Error: …")``
|
||||||
on DSM error, missed window, or timeout.
|
on DSM error or timeout.
|
||||||
"""
|
"""
|
||||||
from mcp_synology_filestation.client import SynologyError as _SynologyError
|
from mcp_synology_filestation.client import SynologyError as _SynologyError
|
||||||
|
|
||||||
burst_count = 10
|
seen_alive = False
|
||||||
burst_interval = 0.05 # 50 ms between burst retries
|
|
||||||
|
|
||||||
# ── Phase 1: burst ────────────────────────────────────────────────
|
if first_status is not None:
|
||||||
for attempt in range(burst_count + 1):
|
if first_status.get("finished"):
|
||||||
if attempt > 0:
|
return True, first_status
|
||||||
await asyncio.sleep(burst_interval)
|
seen_alive = True # finished=False: task is running
|
||||||
|
else:
|
||||||
|
# 599 on the immediate poll: burst-retry (10×, 10 ms apart)
|
||||||
|
for _ in range(10):
|
||||||
|
await asyncio.sleep(0.01)
|
||||||
try:
|
try:
|
||||||
status_data = await client.request(
|
s = await client.request(
|
||||||
api,
|
api, "status", version=version, params={"taskid": taskid}
|
||||||
"status",
|
|
||||||
version=version,
|
|
||||||
params={"taskid": taskid},
|
|
||||||
)
|
)
|
||||||
except _SynologyError as e:
|
except _SynologyError as e:
|
||||||
if e.code != 599:
|
if e.code == 599:
|
||||||
|
continue
|
||||||
return False, f"Error: {e}"
|
return False, f"Error: {e}"
|
||||||
continue # 599 — task not visible yet, keep bursting
|
if s.get("finished"):
|
||||||
if status_data.get("finished"):
|
return True, s
|
||||||
return True, status_data
|
seen_alive = True
|
||||||
break # finished=False — task confirmed running, enter Phase 2
|
break # finished=False: enter Phase 2
|
||||||
else:
|
|
||||||
# All burst polls returned 599 — one-shot window was missed
|
|
||||||
return (
|
|
||||||
False,
|
|
||||||
"Error: Could not read task result — the operation finished"
|
|
||||||
" before the first successful poll. Please retry.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# ── Phase 2: exponential backoff ──────────────────────────────────
|
# ── Phase 2: exponential backoff until finished or 60 s timeout ──
|
||||||
delay = 0.2
|
delay = 0.2
|
||||||
elapsed = burst_count * burst_interval # time already spent in burst
|
elapsed = 0.0
|
||||||
timeout = 60.0
|
timeout = 60.0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@@ -183,23 +178,23 @@ def register_filestation(
|
|||||||
delay = min(delay * 2, 2.0)
|
delay = min(delay * 2, 2.0)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
status_data = await client.request(
|
s = await client.request(api, "status", version=version, params={"taskid": taskid})
|
||||||
api,
|
|
||||||
"status",
|
|
||||||
version=version,
|
|
||||||
params={"taskid": taskid},
|
|
||||||
)
|
|
||||||
except _SynologyError as e:
|
except _SynologyError as e:
|
||||||
if e.code == 599:
|
if e.code == 599:
|
||||||
|
if seen_alive:
|
||||||
|
# Task was running but the one-shot window closed before we read it
|
||||||
return (
|
return (
|
||||||
False,
|
False,
|
||||||
"Error: Could not read task result — the operation finished"
|
"Error: Could not read task result — the operation finished"
|
||||||
" before the result was polled. Please retry.",
|
" before the result was polled. Please retry.",
|
||||||
)
|
)
|
||||||
|
# Not yet seen alive: large dir still initialising, keep polling
|
||||||
|
else:
|
||||||
return False, f"Error: {e}"
|
return False, f"Error: {e}"
|
||||||
|
else:
|
||||||
if status_data.get("finished"):
|
seen_alive = True
|
||||||
return True, status_data
|
if s.get("finished"):
|
||||||
|
return True, s
|
||||||
|
|
||||||
if elapsed >= timeout:
|
if elapsed >= timeout:
|
||||||
return (
|
return (
|
||||||
@@ -895,20 +890,16 @@ def register_filestation(
|
|||||||
return "Error: no path provided."
|
return "Error: no path provided."
|
||||||
|
|
||||||
try:
|
try:
|
||||||
start_data = await client.request(
|
taskid, first_status = await client.start_and_poll_immediately(
|
||||||
"SYNO.FileStation.DirSize",
|
"SYNO.FileStation.DirSize",
|
||||||
"start",
|
start_params={"path": json.dumps(paths)},
|
||||||
version=2,
|
poll_version=1,
|
||||||
params={"path": json.dumps(paths)},
|
start_version=2,
|
||||||
)
|
)
|
||||||
except SynologyError as e:
|
except SynologyError as e:
|
||||||
return f"Error: {e}"
|
return f"Error: {e}"
|
||||||
|
|
||||||
taskid: str = start_data.get("taskid", "")
|
ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid, first_status)
|
||||||
if not taskid:
|
|
||||||
return "Error: DSM did not return a task ID."
|
|
||||||
|
|
||||||
ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid)
|
|
||||||
if not ok:
|
if not ok:
|
||||||
return result # type: ignore[return-value]
|
return result # type: ignore[return-value]
|
||||||
|
|
||||||
@@ -956,20 +947,16 @@ def register_filestation(
|
|||||||
from mcp_synology_filestation.client import SynologyError
|
from mcp_synology_filestation.client import SynologyError
|
||||||
|
|
||||||
try:
|
try:
|
||||||
start_data = await client.request(
|
taskid, first_status = await client.start_and_poll_immediately(
|
||||||
"SYNO.FileStation.MD5",
|
"SYNO.FileStation.MD5",
|
||||||
"start",
|
start_params={"file_path": json.dumps(path)},
|
||||||
version=2,
|
poll_version=1,
|
||||||
params={"file_path": json.dumps(path)},
|
start_version=2,
|
||||||
)
|
)
|
||||||
except SynologyError as e:
|
except SynologyError as e:
|
||||||
return f"Error: {e}"
|
return f"Error: {e}"
|
||||||
|
|
||||||
taskid: str = start_data.get("taskid", "")
|
ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid, first_status)
|
||||||
if not taskid:
|
|
||||||
return "Error: DSM did not return a task ID."
|
|
||||||
|
|
||||||
ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid)
|
|
||||||
if not ok:
|
if not ok:
|
||||||
return result # type: ignore[return-value]
|
return result # type: ignore[return-value]
|
||||||
|
|
||||||
|
|||||||
@@ -22,8 +22,24 @@ def config() -> AppConfig:
|
|||||||
|
|
||||||
def _make_mcp_and_tools(config: AppConfig, client: MagicMock) -> dict:
|
def _make_mcp_and_tools(config: AppConfig, client: MagicMock) -> dict:
|
||||||
"""Register FileStation tools on a mock FastMCP and collect them by name."""
|
"""Register FileStation tools on a mock FastMCP and collect them by name."""
|
||||||
|
from mcp_synology_filestation.client import FileStationClient
|
||||||
from mcp_synology_filestation.tools.filestation import register_filestation
|
from mcp_synology_filestation.tools.filestation import register_filestation
|
||||||
|
|
||||||
|
# Bind the real start_and_poll_immediately so it delegates into the
|
||||||
|
# already-mocked client.request — no separate mock needed per test.
|
||||||
|
async def _start_and_poll_immediately(
|
||||||
|
api: str,
|
||||||
|
start_params: dict,
|
||||||
|
poll_version: int,
|
||||||
|
*,
|
||||||
|
start_version: int | None = None,
|
||||||
|
):
|
||||||
|
return await FileStationClient.start_and_poll_immediately(
|
||||||
|
client, api, start_params, poll_version, start_version=start_version
|
||||||
|
)
|
||||||
|
|
||||||
|
client.start_and_poll_immediately = _start_and_poll_immediately
|
||||||
|
|
||||||
registered: dict[str, object] = {}
|
registered: dict[str, object] = {}
|
||||||
|
|
||||||
mcp = MagicMock()
|
mcp = MagicMock()
|
||||||
@@ -1635,12 +1651,11 @@ async def test_dir_size_retries_on_transient_599(config: AppConfig) -> None:
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_dir_size_window_timeout_on_persistent_599(config: AppConfig) -> None:
|
async def test_dir_size_times_out_on_persistent_599(config: AppConfig) -> None:
|
||||||
"""dir_size fails fast (window_timeout=3 s) when DSM returns only 599s.
|
"""dir_size times out after 60 s when DSM returns only 599s for every poll.
|
||||||
|
|
||||||
Once the window_timeout elapses without ever seeing the task running
|
The immediate poll + burst both return 599; Phase 2 keeps polling (large
|
||||||
(finished=False), _poll_task returns the "result window missed" error
|
directories eventually surface) until the 60 s timeout fires.
|
||||||
rather than waiting the full 60 s.
|
|
||||||
"""
|
"""
|
||||||
client = MagicMock()
|
client = MagicMock()
|
||||||
|
|
||||||
@@ -1656,7 +1671,7 @@ async def test_dir_size_window_timeout_on_persistent_599(config: AppConfig) -> N
|
|||||||
result = await tools["dir_size"](path="/dead")
|
result = await tools["dir_size"](path="/dead")
|
||||||
|
|
||||||
assert result.startswith("Error:")
|
assert result.startswith("Error:")
|
||||||
assert "retry" in result.lower() or "window" in result.lower() or "poll" in result.lower()
|
assert "timed out" in result.lower() or "60 seconds" in result
|
||||||
|
|
||||||
|
|
||||||
# ──────────────────────────────────────────────────────────────────────────
|
# ──────────────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
Reference in New Issue
Block a user