fix: _poll_oneshot for DirSize/MD5 with burst-retry on early 599

Add FileStationClient.start_and_poll_immediately: starts the async task and
immediately makes the first status poll within the same method, with no
intermediate awaits other than the two HTTP calls. This minimises scheduler
latency between start and first poll for one-shot tasks.

_poll_oneshot now accepts the first_status from start_and_poll_immediately:
- finished=True on first poll → return immediately
- finished=False → Phase 2 (exponential backoff, 60 s timeout)
- None (first poll was 599) → burst-retry 10× at 10 ms, then Phase 2
  (Phase 2 keeps polling through 599 until seen_alive, then fails fast)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-14 13:50:28 +02:00
parent 0e8ffaa6df
commit 62f8e41931
5 changed files with 128 additions and 80 deletions
@@ -123,58 +123,53 @@ def register_filestation(
api: str,
version: int,
taskid: str,
first_status: dict[str, Any] | None,
) -> tuple[bool, dict[str, Any] | str]:
"""Poll a one-shot DSM task (DirSize, MD5).
"""Continue polling a one-shot DSM task after the first status poll.
One-shot tasks deliver ``finished=True`` exactly once; after that,
status polls return 599. Two phases:
Called after ``client.start_and_poll_immediately`` has already made
the first status request. Handles three outcomes for ``first_status``:
Phase 1 — burst: polls immediately, then up to 10 times at 50 ms
intervals. This catches tasks that complete in under ~500 ms.
If all burst polls return 599, the result window was missed.
Phase 2 — normal: entered only after receiving ``finished=False``
(task confirmed running). Exponential backoff up to 60 s. A 599 in
this phase means the window closed before we polled — fail fast.
* ``finished=True`` — return immediately (task done on first poll).
* ``finished=False`` — task confirmed running; enter Phase 2
(exponential backoff until ``finished=True`` or 60 s timeout).
* ``None`` (first poll returned 599) — burst-retry 10× at 10 ms,
then enter Phase 2 regardless (large directories will eventually
return ``finished=False``; a 599 after the task was seen alive
means the window closed — fail fast with a retry message).
Returns:
``(True, status_dict)`` on success, or ``(False, "Error: …")``
on DSM error, missed window, or timeout.
on DSM error or timeout.
"""
from mcp_synology_filestation.client import SynologyError as _SynologyError
burst_count = 10
burst_interval = 0.05 # 50 ms between burst retries
seen_alive = False
# ── Phase 1: burst ────────────────────────────────────────────────
for attempt in range(burst_count + 1):
if attempt > 0:
await asyncio.sleep(burst_interval)
try:
status_data = await client.request(
api,
"status",
version=version,
params={"taskid": taskid},
)
except _SynologyError as e:
if e.code != 599:
return False, f"Error: {e}"
continue # 599 — task not visible yet, keep bursting
if status_data.get("finished"):
return True, status_data
break # finished=False — task confirmed running, enter Phase 2
if first_status is not None:
if first_status.get("finished"):
return True, first_status
seen_alive = True # finished=False: task is running
else:
# All burst polls returned 599 — one-shot window was missed
return (
False,
"Error: Could not read task result — the operation finished"
" before the first successful poll. Please retry.",
)
# 599 on the immediate poll: burst-retry (10×, 10 ms apart)
for _ in range(10):
await asyncio.sleep(0.01)
try:
s = await client.request(
api, "status", version=version, params={"taskid": taskid}
)
except _SynologyError as e:
if e.code == 599:
continue
return False, f"Error: {e}"
if s.get("finished"):
return True, s
seen_alive = True
break # finished=False: enter Phase 2
# ── Phase 2: exponential backoff ──────────────────────────────────
# ── Phase 2: exponential backoff until finished or 60 s timeout ──
delay = 0.2
elapsed = burst_count * burst_interval # time already spent in burst
elapsed = 0.0
timeout = 60.0
while True:
@@ -183,23 +178,23 @@ def register_filestation(
delay = min(delay * 2, 2.0)
try:
status_data = await client.request(
api,
"status",
version=version,
params={"taskid": taskid},
)
s = await client.request(api, "status", version=version, params={"taskid": taskid})
except _SynologyError as e:
if e.code == 599:
return (
False,
"Error: Could not read task result — the operation finished"
" before the result was polled. Please retry.",
)
return False, f"Error: {e}"
if status_data.get("finished"):
return True, status_data
if seen_alive:
# Task was running but the one-shot window closed before we read it
return (
False,
"Error: Could not read task result — the operation finished"
" before the result was polled. Please retry.",
)
# Not yet seen alive: large dir still initialising, keep polling
else:
return False, f"Error: {e}"
else:
seen_alive = True
if s.get("finished"):
return True, s
if elapsed >= timeout:
return (
@@ -895,20 +890,16 @@ def register_filestation(
return "Error: no path provided."
try:
start_data = await client.request(
taskid, first_status = await client.start_and_poll_immediately(
"SYNO.FileStation.DirSize",
"start",
version=2,
params={"path": json.dumps(paths)},
start_params={"path": json.dumps(paths)},
poll_version=1,
start_version=2,
)
except SynologyError as e:
return f"Error: {e}"
taskid: str = start_data.get("taskid", "")
if not taskid:
return "Error: DSM did not return a task ID."
ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid)
ok, result = await _poll_oneshot("SYNO.FileStation.DirSize", 1, taskid, first_status)
if not ok:
return result # type: ignore[return-value]
@@ -956,20 +947,16 @@ def register_filestation(
from mcp_synology_filestation.client import SynologyError
try:
start_data = await client.request(
taskid, first_status = await client.start_and_poll_immediately(
"SYNO.FileStation.MD5",
"start",
version=2,
params={"file_path": json.dumps(path)},
start_params={"file_path": json.dumps(path)},
poll_version=1,
start_version=2,
)
except SynologyError as e:
return f"Error: {e}"
taskid: str = start_data.get("taskid", "")
if not taskid:
return "Error: DSM did not return a task ID."
ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid)
ok, result = await _poll_oneshot("SYNO.FileStation.MD5", 1, taskid, first_status)
if not ok:
return result # type: ignore[return-value]