fix: dsm_client — scrub session IDs, fix re-auth race, detect build_stream JSON errors

- Scrub `_sid` from URL in HTTPStatusError messages to prevent session-ID
  leaks into MCP tool responses (C1).
- Re-auth lock now double-checks the SID snapshot to avoid duplicate logins
  on concurrent 106/107/119 responses (M3).
- `trigger_build_stream` again detects immediate JSON error responses from
  DSM (regression from 0.2.6); SSE streams remain fire-and-forget (C2/M8).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-21 09:37:02 +02:00
parent ad199674e7
commit 21fd8e168c
+111 -26
View File
@@ -11,7 +11,9 @@ Thin async client wrapping Synology DSM Web API conventions:
from __future__ import annotations from __future__ import annotations
import asyncio import asyncio
import json
import logging import logging
import re
import sys import sys
from typing import TYPE_CHECKING, Any from typing import TYPE_CHECKING, Any
@@ -28,6 +30,19 @@ _SESSION_ERROR_CODES = frozenset({106, 107, 119})
# Parameters to mask in debug logging # Parameters to mask in debug logging
_SENSITIVE_PARAMS = frozenset({"passwd", "_sid", "device_id", "otp_code", "device_token"}) _SENSITIVE_PARAMS = frozenset({"passwd", "_sid", "device_id", "otp_code", "device_token"})
# Regex to strip the session-ID value from URL query strings before surfacing
# them in error messages (HTTPStatusError embeds the full request URL).
_SID_QUERY_RE = re.compile(r"(_sid=)[^&\s]*")
def _scrub_url(url: str) -> str:
"""Replace the value of any `_sid=...` query param with `***`.
Used to sanitize URLs embedded in `httpx.HTTPStatusError` messages so the
raw DSM session ID never reaches log output or MCP tool responses.
"""
return _SID_QUERY_RE.sub(r"\1***", url)
class SynologyError(Exception): class SynologyError(Exception):
"""Raised when DSM API returns a non-success response.""" """Raised when DSM API returns a non-success response."""
@@ -185,7 +200,14 @@ class DsmClient:
logger.debug("Querying API info from %s", url) logger.debug("Querying API info from %s", url)
resp = await http.get(url, params=params) resp = await http.get(url, params=params)
resp.raise_for_status() try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
body = resp.json() body = resp.json()
if not body.get("success"): if not body.get("success"):
@@ -268,7 +290,14 @@ class DsmClient:
) )
resp = await http.get(url, params=req_params) resp = await http.get(url, params=req_params)
resp.raise_for_status() try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
body = resp.json() body = resp.json()
if body.get("success"): if body.get("success"):
@@ -281,13 +310,17 @@ class DsmClient:
# Transparent re-auth on session errors (one retry only) # Transparent re-auth on session errors (one retry only)
if code in _SESSION_ERROR_CODES and not _is_retry and self._auth_manager: if code in _SESSION_ERROR_CODES and not _is_retry and self._auth_manager:
old_sid = self._sid
logger.info("Session error %d on %s/%s, re-authenticating...", code, api, method) logger.info("Session error %d on %s/%s, re-authenticating...", code, api, method)
async with self._reauth_lock: async with self._reauth_lock:
self._sid = None # Double-check: if another task already refreshed the SID while
try: # we were waiting on the lock, skip the redundant login.
self._sid = await self._auth_manager.login(self) if self._sid == old_sid:
except Exception as e: self._sid = None
raise SynologyError(f"Re-authentication failed: {e}", code=code) from e try:
self._sid = await self._auth_manager.login(self)
except Exception as e:
raise SynologyError(f"Re-authentication failed: {e}", code=code) from e
return await self.request(api, method, version, params, _is_retry=True) return await self.request(api, method, version, params, _is_retry=True)
raise SynologyError(_error_message(code, api), code=code) raise SynologyError(_error_message(code, api), code=code)
@@ -348,7 +381,14 @@ class DsmClient:
logger.debug("DSM POST: %s/%s v%d%s", api, method, resolved_version, log_form) logger.debug("DSM POST: %s/%s v%d%s", api, method, resolved_version, log_form)
resp = await http.post(url, params=query_params, data=form) resp = await http.post(url, params=query_params, data=form)
resp.raise_for_status() try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
body = resp.json() body = resp.json()
if body.get("success"): if body.get("success"):
@@ -363,23 +403,25 @@ class DsmClient:
This is the proper way to force an image pull and project restart in DSM This is the proper way to force an image pull and project restart in DSM
Container Manager (confirmed via browser DevTools). The endpoint is a Container Manager (confirmed via browser DevTools). The endpoint is a
Server-Sent Events (SSE) stream; we send the request and close immediately Server-Sent Events (SSE) stream on success; we send the request, check
without reading any of the response body. DSM starts the build upon the response headers, and close without consuming the SSE body. DSM
receiving the request and continues server-side regardless of whether the starts the build upon receiving the request and continues server-side
HTTP connection stays open. Callers should poll SYNO.Docker.Project/list regardless of whether the HTTP connection stays open. Callers should
for the resulting RUNNING status. poll SYNO.Docker.Project/list for the resulting RUNNING status.
Fire-and-forget: we only wait long enough to receive the HTTP response Error detection: DSM signals application-level rejection (e.g. project
headers (to detect immediate HTTP-level errors), then close the connection. locked, invalid id) as an HTTP-200 JSON body `{"success": false, ...}`
We never read SSE events, so this returns in < 10 s regardless of how long rather than as an SSE stream. We inspect the `Content-Type` header and,
the image pull takes. Claude Desktop's ~4-minute tool-call timeout is when it is `application/json`, read a small capped prefix of the body
therefore not a concern. to surface the DSM error code immediately instead of forcing the caller
into a multi-minute polling timeout. SSE responses are not read.
Args: Args:
project_id: Project UUID from SYNO.Docker.Project/list. project_id: Project UUID from SYNO.Docker.Project/list.
Raises: Raises:
httpx.HTTPStatusError: If the HTTP response status indicates an error. SynologyError: If DSM rejects the build with a JSON error body, or
if the HTTP response status indicates a transport-level error.
""" """
await self._ensure_initialized() await self._ensure_initialized()
http = self._get_http() http = self._get_http()
@@ -403,9 +445,10 @@ class DsmClient:
sys.stderr.flush() sys.stderr.flush()
logger.debug("build_stream: project_id=%s", project_id) logger.debug("build_stream: project_id=%s", project_id)
# Fire-and-forget: open the stream, check HTTP status, close immediately. # Fire-and-forget for the SSE body, but detect immediate JSON errors.
# The read timeout only applies to waiting for response *headers*; we never # The read timeout only applies to waiting for response *headers* and
# read the SSE body, so DSM's streaming cannot block this call indefinitely. # for the (small, capped) JSON error body we read; we never consume SSE
# events, so DSM's streaming cannot block this call indefinitely.
try: try:
async with http.stream( async with http.stream(
"GET", "GET",
@@ -413,8 +456,36 @@ class DsmClient:
params=params, params=params,
timeout=httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=5.0), timeout=httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=5.0),
) as resp: ) as resp:
resp.raise_for_status() try:
# Body intentionally not read. Close context → connection closes. resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
content_type = resp.headers.get("content-type", "")
if "application/json" in content_type:
# DSM rejected the build — read the JSON error body (capped
# at ~4 KB; DSM error envelopes are tiny).
body = b""
async for chunk in resp.aiter_bytes():
body += chunk
if len(body) >= 4096:
break
try:
parsed = json.loads(body.decode("utf-8", errors="replace"))
except json.JSONDecodeError:
# Malformed response — treat as accepted and let the
# caller's polling surface any real failure.
return
if not parsed.get("success", True):
code = parsed.get("error", {}).get("code", 0)
raise SynologyError(_error_message(code, api), code=code)
# success=true with JSON content-type: odd, treat as accepted.
return
# SSE or anything else → fire-and-forget, close without reading.
except httpx.ReadTimeout: except httpx.ReadTimeout:
# Headers not received within 10 s, but the GET request was already # Headers not received within 10 s, but the GET request was already
# sent. DSM received it and started the build. Proceed to polling. # sent. DSM received it and started the build. Proceed to polling.
@@ -480,7 +551,14 @@ class DsmClient:
files={"file": (filename, encoded, "text/plain")}, files={"file": (filename, encoded, "text/plain")},
timeout=httpx.Timeout(60.0), timeout=httpx.Timeout(60.0),
) )
resp.raise_for_status() try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
body = resp.json() body = resp.json()
if body.get("success"): if body.get("success"):
@@ -523,7 +601,14 @@ class DsmClient:
logger.debug("DSM GET: %s/download v%d%s", api, resolved_version, log_params) logger.debug("DSM GET: %s/download v%d%s", api, resolved_version, log_params)
resp = await http.get(url, params=params, timeout=httpx.Timeout(60.0)) resp = await http.get(url, params=params, timeout=httpx.Timeout(60.0))
resp.raise_for_status() try:
resp.raise_for_status()
except httpx.HTTPStatusError as e:
url_safe = _scrub_url(str(e.request.url))
raise SynologyError(
f"HTTP {resp.status_code} from {url_safe}",
code=resp.status_code,
) from None
content_type = resp.headers.get("content-type", "") content_type = resp.headers.get("content-type", "")
if "application/json" in content_type: if "application/json" in content_type: