fix: dsm_client — scrub session IDs, fix re-auth race, detect build_stream JSON errors
- Scrub `_sid` from URL in HTTPStatusError messages to prevent session-ID leaks into MCP tool responses (C1). - Re-auth lock now double-checks the SID snapshot to avoid duplicate logins on concurrent 106/107/119 responses (M3). - `trigger_build_stream` again detects immediate JSON error responses from DSM (regression from 0.2.6); SSE streams remain fire-and-forget (C2/M8). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,9 @@ Thin async client wrapping Synology DSM Web API conventions:
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from typing import TYPE_CHECKING, Any
|
||||
|
||||
@@ -28,6 +30,19 @@ _SESSION_ERROR_CODES = frozenset({106, 107, 119})
|
||||
# Parameters to mask in debug logging
|
||||
_SENSITIVE_PARAMS = frozenset({"passwd", "_sid", "device_id", "otp_code", "device_token"})
|
||||
|
||||
# Regex to strip the session-ID value from URL query strings before surfacing
|
||||
# them in error messages (HTTPStatusError embeds the full request URL).
|
||||
_SID_QUERY_RE = re.compile(r"(_sid=)[^&\s]*")
|
||||
|
||||
|
||||
def _scrub_url(url: str) -> str:
|
||||
"""Replace the value of any `_sid=...` query param with `***`.
|
||||
|
||||
Used to sanitize URLs embedded in `httpx.HTTPStatusError` messages so the
|
||||
raw DSM session ID never reaches log output or MCP tool responses.
|
||||
"""
|
||||
return _SID_QUERY_RE.sub(r"\1***", url)
|
||||
|
||||
|
||||
class SynologyError(Exception):
|
||||
"""Raised when DSM API returns a non-success response."""
|
||||
@@ -185,7 +200,14 @@ class DsmClient:
|
||||
|
||||
logger.debug("Querying API info from %s", url)
|
||||
resp = await http.get(url, params=params)
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
url_safe = _scrub_url(str(e.request.url))
|
||||
raise SynologyError(
|
||||
f"HTTP {resp.status_code} from {url_safe}",
|
||||
code=resp.status_code,
|
||||
) from None
|
||||
body = resp.json()
|
||||
|
||||
if not body.get("success"):
|
||||
@@ -268,7 +290,14 @@ class DsmClient:
|
||||
)
|
||||
|
||||
resp = await http.get(url, params=req_params)
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
url_safe = _scrub_url(str(e.request.url))
|
||||
raise SynologyError(
|
||||
f"HTTP {resp.status_code} from {url_safe}",
|
||||
code=resp.status_code,
|
||||
) from None
|
||||
body = resp.json()
|
||||
|
||||
if body.get("success"):
|
||||
@@ -281,8 +310,12 @@ class DsmClient:
|
||||
|
||||
# Transparent re-auth on session errors (one retry only)
|
||||
if code in _SESSION_ERROR_CODES and not _is_retry and self._auth_manager:
|
||||
old_sid = self._sid
|
||||
logger.info("Session error %d on %s/%s, re-authenticating...", code, api, method)
|
||||
async with self._reauth_lock:
|
||||
# Double-check: if another task already refreshed the SID while
|
||||
# we were waiting on the lock, skip the redundant login.
|
||||
if self._sid == old_sid:
|
||||
self._sid = None
|
||||
try:
|
||||
self._sid = await self._auth_manager.login(self)
|
||||
@@ -348,7 +381,14 @@ class DsmClient:
|
||||
logger.debug("DSM POST: %s/%s v%d — %s", api, method, resolved_version, log_form)
|
||||
|
||||
resp = await http.post(url, params=query_params, data=form)
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
url_safe = _scrub_url(str(e.request.url))
|
||||
raise SynologyError(
|
||||
f"HTTP {resp.status_code} from {url_safe}",
|
||||
code=resp.status_code,
|
||||
) from None
|
||||
body = resp.json()
|
||||
|
||||
if body.get("success"):
|
||||
@@ -363,23 +403,25 @@ class DsmClient:
|
||||
|
||||
This is the proper way to force an image pull and project restart in DSM
|
||||
Container Manager (confirmed via browser DevTools). The endpoint is a
|
||||
Server-Sent Events (SSE) stream; we send the request and close immediately
|
||||
without reading any of the response body. DSM starts the build upon
|
||||
receiving the request and continues server-side regardless of whether the
|
||||
HTTP connection stays open. Callers should poll SYNO.Docker.Project/list
|
||||
for the resulting RUNNING status.
|
||||
Server-Sent Events (SSE) stream on success; we send the request, check
|
||||
the response headers, and close without consuming the SSE body. DSM
|
||||
starts the build upon receiving the request and continues server-side
|
||||
regardless of whether the HTTP connection stays open. Callers should
|
||||
poll SYNO.Docker.Project/list for the resulting RUNNING status.
|
||||
|
||||
Fire-and-forget: we only wait long enough to receive the HTTP response
|
||||
headers (to detect immediate HTTP-level errors), then close the connection.
|
||||
We never read SSE events, so this returns in < 10 s regardless of how long
|
||||
the image pull takes. Claude Desktop's ~4-minute tool-call timeout is
|
||||
therefore not a concern.
|
||||
Error detection: DSM signals application-level rejection (e.g. project
|
||||
locked, invalid id) as an HTTP-200 JSON body `{"success": false, ...}`
|
||||
rather than as an SSE stream. We inspect the `Content-Type` header and,
|
||||
when it is `application/json`, read a small capped prefix of the body
|
||||
to surface the DSM error code immediately instead of forcing the caller
|
||||
into a multi-minute polling timeout. SSE responses are not read.
|
||||
|
||||
Args:
|
||||
project_id: Project UUID from SYNO.Docker.Project/list.
|
||||
|
||||
Raises:
|
||||
httpx.HTTPStatusError: If the HTTP response status indicates an error.
|
||||
SynologyError: If DSM rejects the build with a JSON error body, or
|
||||
if the HTTP response status indicates a transport-level error.
|
||||
"""
|
||||
await self._ensure_initialized()
|
||||
http = self._get_http()
|
||||
@@ -403,9 +445,10 @@ class DsmClient:
|
||||
sys.stderr.flush()
|
||||
logger.debug("build_stream: project_id=%s", project_id)
|
||||
|
||||
# Fire-and-forget: open the stream, check HTTP status, close immediately.
|
||||
# The read timeout only applies to waiting for response *headers*; we never
|
||||
# read the SSE body, so DSM's streaming cannot block this call indefinitely.
|
||||
# Fire-and-forget for the SSE body, but detect immediate JSON errors.
|
||||
# The read timeout only applies to waiting for response *headers* and
|
||||
# for the (small, capped) JSON error body we read; we never consume SSE
|
||||
# events, so DSM's streaming cannot block this call indefinitely.
|
||||
try:
|
||||
async with http.stream(
|
||||
"GET",
|
||||
@@ -413,8 +456,36 @@ class DsmClient:
|
||||
params=params,
|
||||
timeout=httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=5.0),
|
||||
) as resp:
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
# Body intentionally not read. Close context → connection closes.
|
||||
except httpx.HTTPStatusError as e:
|
||||
url_safe = _scrub_url(str(e.request.url))
|
||||
raise SynologyError(
|
||||
f"HTTP {resp.status_code} from {url_safe}",
|
||||
code=resp.status_code,
|
||||
) from None
|
||||
|
||||
content_type = resp.headers.get("content-type", "")
|
||||
if "application/json" in content_type:
|
||||
# DSM rejected the build — read the JSON error body (capped
|
||||
# at ~4 KB; DSM error envelopes are tiny).
|
||||
body = b""
|
||||
async for chunk in resp.aiter_bytes():
|
||||
body += chunk
|
||||
if len(body) >= 4096:
|
||||
break
|
||||
try:
|
||||
parsed = json.loads(body.decode("utf-8", errors="replace"))
|
||||
except json.JSONDecodeError:
|
||||
# Malformed response — treat as accepted and let the
|
||||
# caller's polling surface any real failure.
|
||||
return
|
||||
if not parsed.get("success", True):
|
||||
code = parsed.get("error", {}).get("code", 0)
|
||||
raise SynologyError(_error_message(code, api), code=code)
|
||||
# success=true with JSON content-type: odd, treat as accepted.
|
||||
return
|
||||
# SSE or anything else → fire-and-forget, close without reading.
|
||||
except httpx.ReadTimeout:
|
||||
# Headers not received within 10 s, but the GET request was already
|
||||
# sent. DSM received it and started the build. Proceed to polling.
|
||||
@@ -480,7 +551,14 @@ class DsmClient:
|
||||
files={"file": (filename, encoded, "text/plain")},
|
||||
timeout=httpx.Timeout(60.0),
|
||||
)
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
url_safe = _scrub_url(str(e.request.url))
|
||||
raise SynologyError(
|
||||
f"HTTP {resp.status_code} from {url_safe}",
|
||||
code=resp.status_code,
|
||||
) from None
|
||||
body = resp.json()
|
||||
|
||||
if body.get("success"):
|
||||
@@ -523,7 +601,14 @@ class DsmClient:
|
||||
logger.debug("DSM GET: %s/download v%d — %s", api, resolved_version, log_params)
|
||||
|
||||
resp = await http.get(url, params=params, timeout=httpx.Timeout(60.0))
|
||||
try:
|
||||
resp.raise_for_status()
|
||||
except httpx.HTTPStatusError as e:
|
||||
url_safe = _scrub_url(str(e.request.url))
|
||||
raise SynologyError(
|
||||
f"HTTP {resp.status_code} from {url_safe}",
|
||||
code=resp.status_code,
|
||||
) from None
|
||||
|
||||
content_type = resp.headers.get("content-type", "")
|
||||
if "application/json" in content_type:
|
||||
|
||||
Reference in New Issue
Block a user