fix: dsm_client — scrub session IDs, fix re-auth race, detect build_stream JSON errors

- Scrub `_sid` from URL in HTTPStatusError messages to prevent session-ID
  leaks into MCP tool responses (C1).
- Re-auth lock now double-checks the SID snapshot to avoid duplicate logins
  on concurrent 106/107/119 responses (M3).
- `trigger_build_stream` again detects immediate JSON error responses from
  DSM (regression from 0.2.6); SSE streams remain fire-and-forget (C2/M8).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-21 09:37:02 +02:00
parent ad199674e7
commit 21fd8e168c
+111 -26
View File
@@ -11,7 +11,9 @@ Thin async client wrapping Synology DSM Web API conventions:
from __future__ import annotations
import asyncio
import json
import logging
import re
import sys
from typing import TYPE_CHECKING, Any
@@ -28,6 +30,19 @@ _SESSION_ERROR_CODES = frozenset({106, 107, 119})
# Parameters to mask in debug logging
_SENSITIVE_PARAMS = frozenset({"passwd", "_sid", "device_id", "otp_code", "device_token"})
# Regex to strip the session-ID value from URL query strings before surfacing
# them in error messages (HTTPStatusError embeds the full request URL).
_SID_QUERY_RE = re.compile(r"(_sid=)[^&\s]*")
def _scrub_url(url: str) -> str:
"""Replace the value of any `_sid=...` query param with `***`.
Used to sanitize URLs embedded in `httpx.HTTPStatusError` messages so the
raw DSM session ID never reaches log output or MCP tool responses.
"""
return _SID_QUERY_RE.sub(r"\1***", url)
class SynologyError(Exception):
"""Raised when DSM API returns a non-success response."""
@@ -185,7 +200,14 @@ class DsmClient:
logger.debug("Querying API info from %s", url)
resp = await http.get(url, params=params)
resp.raise_for_status()
try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
body = resp.json()
if not body.get("success"):
@@ -268,7 +290,14 @@ class DsmClient:
)
resp = await http.get(url, params=req_params)
resp.raise_for_status()
try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
body = resp.json()
if body.get("success"):
@@ -281,13 +310,17 @@ class DsmClient:
# Transparent re-auth on session errors (one retry only)
if code in _SESSION_ERROR_CODES and not _is_retry and self._auth_manager:
old_sid = self._sid
logger.info("Session error %d on %s/%s, re-authenticating...", code, api, method)
async with self._reauth_lock:
self._sid = None
try:
self._sid = await self._auth_manager.login(self)
except Exception as e:
raise SynologyError(f"Re-authentication failed: {e}", code=code) from e
# Double-check: if another task already refreshed the SID while
# we were waiting on the lock, skip the redundant login.
if self._sid == old_sid:
self._sid = None
try:
self._sid = await self._auth_manager.login(self)
except Exception as e:
raise SynologyError(f"Re-authentication failed: {e}", code=code) from e
return await self.request(api, method, version, params, _is_retry=True)
raise SynologyError(_error_message(code, api), code=code)
@@ -348,7 +381,14 @@ class DsmClient:
logger.debug("DSM POST: %s/%s v%d%s", api, method, resolved_version, log_form)
resp = await http.post(url, params=query_params, data=form)
resp.raise_for_status()
try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
body = resp.json()
if body.get("success"):
@@ -363,23 +403,25 @@ class DsmClient:
This is the proper way to force an image pull and project restart in DSM
Container Manager (confirmed via browser DevTools). The endpoint is a
Server-Sent Events (SSE) stream; we send the request and close immediately
without reading any of the response body. DSM starts the build upon
receiving the request and continues server-side regardless of whether the
HTTP connection stays open. Callers should poll SYNO.Docker.Project/list
for the resulting RUNNING status.
Server-Sent Events (SSE) stream on success; we send the request, check
the response headers, and close without consuming the SSE body. DSM
starts the build upon receiving the request and continues server-side
regardless of whether the HTTP connection stays open. Callers should
poll SYNO.Docker.Project/list for the resulting RUNNING status.
Fire-and-forget: we only wait long enough to receive the HTTP response
headers (to detect immediate HTTP-level errors), then close the connection.
We never read SSE events, so this returns in < 10 s regardless of how long
the image pull takes. Claude Desktop's ~4-minute tool-call timeout is
therefore not a concern.
Error detection: DSM signals application-level rejection (e.g. project
locked, invalid id) as an HTTP-200 JSON body `{"success": false, ...}`
rather than as an SSE stream. We inspect the `Content-Type` header and,
when it is `application/json`, read a small capped prefix of the body
to surface the DSM error code immediately instead of forcing the caller
into a multi-minute polling timeout. SSE responses are not read.
Args:
project_id: Project UUID from SYNO.Docker.Project/list.
Raises:
httpx.HTTPStatusError: If the HTTP response status indicates an error.
SynologyError: If DSM rejects the build with a JSON error body, or
if the HTTP response status indicates a transport-level error.
"""
await self._ensure_initialized()
http = self._get_http()
@@ -403,9 +445,10 @@ class DsmClient:
sys.stderr.flush()
logger.debug("build_stream: project_id=%s", project_id)
# Fire-and-forget: open the stream, check HTTP status, close immediately.
# The read timeout only applies to waiting for response *headers*; we never
# read the SSE body, so DSM's streaming cannot block this call indefinitely.
# Fire-and-forget for the SSE body, but detect immediate JSON errors.
# The read timeout only applies to waiting for response *headers* and
# for the (small, capped) JSON error body we read; we never consume SSE
# events, so DSM's streaming cannot block this call indefinitely.
try:
async with http.stream(
"GET",
@@ -413,8 +456,36 @@ class DsmClient:
params=params,
timeout=httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=5.0),
) as resp:
resp.raise_for_status()
# Body intentionally not read. Close context → connection closes.
try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
content_type = resp.headers.get("content-type", "")
if "application/json" in content_type:
# DSM rejected the build — read the JSON error body (capped
# at ~4 KB; DSM error envelopes are tiny).
body = b""
async for chunk in resp.aiter_bytes():
body += chunk
if len(body) >= 4096:
break
try:
parsed = json.loads(body.decode("utf-8", errors="replace"))
except json.JSONDecodeError:
# Malformed response — treat as accepted and let the
# caller's polling surface any real failure.
return
if not parsed.get("success", True):
code = parsed.get("error", {}).get("code", 0)
raise SynologyError(_error_message(code, api), code=code)
# success=true with JSON content-type: odd, treat as accepted.
return
# SSE or anything else → fire-and-forget, close without reading.
except httpx.ReadTimeout:
# Headers not received within 10 s, but the GET request was already
# sent. DSM received it and started the build. Proceed to polling.
@@ -480,7 +551,14 @@ class DsmClient:
files={"file": (filename, encoded, "text/plain")},
timeout=httpx.Timeout(60.0),
)
resp.raise_for_status()
try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
body = resp.json()
if body.get("success"):
@@ -523,7 +601,14 @@ class DsmClient:
logger.debug("DSM GET: %s/download v%d%s", api, resolved_version, log_params)
resp = await http.get(url, params=params, timeout=httpx.Timeout(60.0))
resp.raise_for_status()
try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
content_type = resp.headers.get("content-type", "")
if "application/json" in content_type: