fix: retry _poll_task on transient 599 instead of aborting immediately

DirSize for large directories (e.g. /docker, 8441 folders, 46832 files)
takes ~800ms to compute. While running, status returns intermediate
progress (finished=false). But on the very first poll the task can return
599 transiently (task just started, not yet available). Previously
_poll_task caught any SynologyError and returned immediately, making
dir_size always fail on the first 599.

Fix: treat 599 as a transient condition and continue polling. Give up
only after 5 consecutive 599 responses. All other error codes remain
immediately fatal.

Investigation confirmed with test_dirsize_md5.py:
- /test-mcp (2937 B): finished=true at 0ms
- /docker (3.9 GB, 46832 files): finished=false at 35ms, finished=true at 789ms

Tests: 2 new cases (retry-succeeds, 5x-599-gives-up) → 95 total

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-14 13:01:27 +02:00
parent 4d8eae752d
commit e3fa71b458
5 changed files with 113 additions and 104 deletions
+55 -98
View File
@@ -1,19 +1,5 @@
"""Wegwerfskript: DirSize + MD5 direkt gegen die NAS testen.
Finale Befunde:
DirSize: start v2 (path als plain string ODER JSON-Array), status v1 (0ms delay)
MD5: start v2, status v1 (0ms delay, ONE-SHOT)
DirSize status-Response Felder:
finished: bool
num_dir: int (Anzahl Unterordner)
num_file: int (Anzahl Dateien)
total_size: int (Gesamtgroesse in Bytes)
MD5 status-Response Felder:
finished: bool
md5: str (Hex-String, 32 Zeichen)
Ausfuehren: uv run python test_dirsize_md5.py
"""
@@ -27,13 +13,13 @@ from mcp_synology_filestation.auth import AuthManager
from mcp_synology_filestation.client import FileStationClient
from mcp_synology_filestation.config import load_config
DIRSIZE_PATH = "/test-mcp"
DIRSIZE_PATHS = ["/test-mcp", "/docker"]
MD5_PATH = "/test-mcp/test.zip"
def pp(label: str, data: object, elapsed_ms: float | None = None) -> None:
print(f"\n{'='*60}")
suffix = f" [{elapsed_ms:.1f} ms nach start]" if elapsed_ms is not None else ""
suffix = f" [{elapsed_ms:.1f} ms]" if elapsed_ms is not None else ""
print(f" {label}{suffix}")
print("=" * 60)
print(json.dumps(data, indent=2, ensure_ascii=False))
@@ -48,87 +34,67 @@ async def raw(http: httpx.AsyncClient, url: str, sid: str, **params) -> dict:
return {"_raw": r.text[:300], "_http_status": r.status_code}
async def probe_dirsize_path_variants(
http: httpx.AsyncClient, sid: str, api_url: str
async def probe_dirsize_long(
http: httpx.AsyncClient, sid: str, api_url: str, path: str
) -> None:
"""Test all three path encoding variants for DirSize start."""
"""Start DirSize and poll v1 every 200ms for up to 15s.
Goal: find out if 599 means 'task still running' (keep polling)
or 'task gone' (give up). If the task eventually returns data,
599 = 'not ready yet'. If it never returns data, 599 = 'task gone'.
"""
print(f"\n{'#'*60}")
print(" DIRSIZE path-Varianten (a=plain, b=json.dumps, c=manuell)")
print(f"{'#'*60}")
variants = [
("a) plain string", DIRSIZE_PATH),
("b) json.dumps([path])", json.dumps([DIRSIZE_PATH])),
("c) json.dumps(path)", json.dumps(DIRSIZE_PATH)),
]
for label, path_val in variants:
print(f"\n--- Variante {label} ---")
print(f" path={path_val!r}")
t0 = time.perf_counter()
start_body = await raw(
http, api_url, sid,
api="SYNO.FileStation.DirSize", version="2", method="start",
path=path_val,
)
pp(f"DirSize::start [{label}]", start_body, (time.perf_counter() - t0) * 1000)
taskid = (start_body.get("data") or {}).get("taskid")
if not taskid:
print(" => Kein taskid (start fehlgeschlagen)")
continue
# Poll immediately
t1 = time.perf_counter()
status_body = await raw(
http, api_url, sid,
api="SYNO.FileStation.DirSize", version="1", method="status",
taskid=taskid,
)
pp(f"DirSize::status [{label}]", status_body, (t1 - t0) * 1000)
data = (status_body.get("data") or {})
if data.get("finished"):
print(f" => OK: num_dir={data.get('num_dir')} "
f"num_file={data.get('num_file')} "
f"total_size={data.get('total_size')}")
else:
code = (status_body.get("error") or {}).get("code", "?")
print(f" => FEHLER code={code}")
async def probe_md5(http: httpx.AsyncClient, sid: str, api_url: str) -> None:
"""Test MD5 with status v1 at 0ms (correct settings)."""
print(f"\n{'#'*60}")
print(" MD5 -- start v2, status v1, 0ms delay")
print(f" DIRSIZE {path} — long poll (15s, every 200ms)")
print(f"{'#'*60}")
t0 = time.perf_counter()
start_body = await raw(
http, api_url, sid,
api="SYNO.FileStation.MD5", version="2", method="start",
file_path=json.dumps(MD5_PATH),
api="SYNO.FileStation.DirSize", version="2", method="start",
path=json.dumps([path]),
)
pp("MD5::start", start_body, (time.perf_counter() - t0) * 1000)
elapsed_start = (time.perf_counter() - t0) * 1000
pp(f"DirSize::start ({path})", start_body, elapsed_start)
taskid = (start_body.get("data") or {}).get("taskid")
if not taskid:
print("[!] No taskid")
print("[!] No taskid.")
return
t1 = time.perf_counter()
r = await raw(
http, api_url, sid,
api="SYNO.FileStation.MD5", version="1", method="status",
taskid=taskid,
)
pp("MD5::status v1 [0ms]", r, (t1 - t0) * 1000)
data = (r.get("data") or {})
if data.get("finished"):
print(f" => OK: md5={data.get('md5')}")
else:
print(f" => FEHLER: {r}")
print(f"\n[*] Polling status v1 every 200ms for up to 15s (taskid={taskid[:12]}...)")
for attempt in range(75): # 75 * 200ms = 15s
if attempt > 0:
await asyncio.sleep(0.2)
t = time.perf_counter()
r = await raw(
http, api_url, sid,
api="SYNO.FileStation.DirSize", version="1", method="status",
taskid=taskid,
)
elapsed = (t - t0) * 1000
success = r.get("success")
data = (r.get("data") or {})
finished = data.get("finished")
error_code = (r.get("error") or {}).get("code")
if finished:
print(f" [{elapsed:.0f}ms] attempt {attempt+1}: FERTIG! "
f"num_dir={data.get('num_dir')} "
f"num_file={data.get('num_file')} "
f"total_size={data.get('total_size')}")
pp(f"DirSize::status final ({path})", r, elapsed)
return
elif success and not finished:
# Still running — show current progress
print(f" [{elapsed:.0f}ms] attempt {attempt+1}: running... "
f"num_dir={data.get('num_dir', '?')} "
f"num_file={data.get('num_file', '?')} "
f"total_size={data.get('total_size', '?')}")
else:
print(f" [{elapsed:.0f}ms] attempt {attempt+1}: error code={error_code}")
# Continue polling — 599 might mean 'not ready yet'
print(f"\n[!] No result after 15s — task never returned data.")
async def main() -> None:
@@ -141,22 +107,13 @@ async def main() -> None:
sid = client.sid
base = config.base_url
for api_name in ["SYNO.FileStation.DirSize", "SYNO.FileStation.MD5"]:
info = client._api_cache.get(api_name) # noqa: SLF001
if info:
print(f"[*] {api_name}: path={info['path']} "
f"v{info['minVersion']}-v{info['maxVersion']}")
else:
print(f"[!] {api_name}: NOT in API cache!")
dirsize_info = client._api_cache.get("SYNO.FileStation.DirSize", {}) # noqa: SLF001
md5_info = client._api_cache.get("SYNO.FileStation.MD5", {}) # noqa: SLF001
dirsize_url = f"{base}/webapi/{dirsize_info.get('path', 'entry.cgi')}"
md5_url = f"{base}/webapi/{md5_info.get('path', 'entry.cgi')}"
info = client._api_cache.get("SYNO.FileStation.DirSize", {}) # noqa: SLF001
api_url = f"{base}/webapi/{info.get('path', 'entry.cgi')}"
print(f"[*] DirSize API: {api_url} v{info.get('minVersion')}-v{info.get('maxVersion')}")
async with httpx.AsyncClient(verify=config.connection.verify_ssl, timeout=30.0) as http:
await probe_dirsize_path_variants(http, sid, dirsize_url)
await probe_md5(http, sid, md5_url)
for path in DIRSIZE_PATHS:
await probe_dirsize_long(http, sid, api_url, path)
await auth.logout(client)
print("\n[*] Logout OK.")