fix: dsm_client — scrub session IDs, fix re-auth race, detect build_stream JSON errors
- Scrub `_sid` from URL in HTTPStatusError messages to prevent session-ID leaks into MCP tool responses (C1). - Re-auth lock now double-checks the SID snapshot to avoid duplicate logins on concurrent 106/107/119 responses (M3). - `trigger_build_stream` again detects immediate JSON error responses from DSM (regression from 0.2.6); SSE streams remain fire-and-forget (C2/M8). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -11,7 +11,9 @@ Thin async client wrapping Synology DSM Web API conventions:
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
|
|
||||||
@@ -28,6 +30,19 @@ _SESSION_ERROR_CODES = frozenset({106, 107, 119})
|
|||||||
# Parameters to mask in debug logging
|
# Parameters to mask in debug logging
|
||||||
_SENSITIVE_PARAMS = frozenset({"passwd", "_sid", "device_id", "otp_code", "device_token"})
|
_SENSITIVE_PARAMS = frozenset({"passwd", "_sid", "device_id", "otp_code", "device_token"})
|
||||||
|
|
||||||
|
# Regex to strip the session-ID value from URL query strings before surfacing
|
||||||
|
# them in error messages (HTTPStatusError embeds the full request URL).
|
||||||
|
_SID_QUERY_RE = re.compile(r"(_sid=)[^&\s]*")
|
||||||
|
|
||||||
|
|
||||||
|
def _scrub_url(url: str) -> str:
|
||||||
|
"""Replace the value of any `_sid=...` query param with `***`.
|
||||||
|
|
||||||
|
Used to sanitize URLs embedded in `httpx.HTTPStatusError` messages so the
|
||||||
|
raw DSM session ID never reaches log output or MCP tool responses.
|
||||||
|
"""
|
||||||
|
return _SID_QUERY_RE.sub(r"\1***", url)
|
||||||
|
|
||||||
|
|
||||||
class SynologyError(Exception):
|
class SynologyError(Exception):
|
||||||
"""Raised when DSM API returns a non-success response."""
|
"""Raised when DSM API returns a non-success response."""
|
||||||
@@ -185,7 +200,14 @@ class DsmClient:
|
|||||||
|
|
||||||
logger.debug("Querying API info from %s", url)
|
logger.debug("Querying API info from %s", url)
|
||||||
resp = await http.get(url, params=params)
|
resp = await http.get(url, params=params)
|
||||||
|
try:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
url_safe = _scrub_url(str(e.request.url))
|
||||||
|
raise SynologyError(
|
||||||
|
f"HTTP {resp.status_code} from {url_safe}",
|
||||||
|
code=resp.status_code,
|
||||||
|
) from None
|
||||||
body = resp.json()
|
body = resp.json()
|
||||||
|
|
||||||
if not body.get("success"):
|
if not body.get("success"):
|
||||||
@@ -268,7 +290,14 @@ class DsmClient:
|
|||||||
)
|
)
|
||||||
|
|
||||||
resp = await http.get(url, params=req_params)
|
resp = await http.get(url, params=req_params)
|
||||||
|
try:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
url_safe = _scrub_url(str(e.request.url))
|
||||||
|
raise SynologyError(
|
||||||
|
f"HTTP {resp.status_code} from {url_safe}",
|
||||||
|
code=resp.status_code,
|
||||||
|
) from None
|
||||||
body = resp.json()
|
body = resp.json()
|
||||||
|
|
||||||
if body.get("success"):
|
if body.get("success"):
|
||||||
@@ -281,8 +310,12 @@ class DsmClient:
|
|||||||
|
|
||||||
# Transparent re-auth on session errors (one retry only)
|
# Transparent re-auth on session errors (one retry only)
|
||||||
if code in _SESSION_ERROR_CODES and not _is_retry and self._auth_manager:
|
if code in _SESSION_ERROR_CODES and not _is_retry and self._auth_manager:
|
||||||
|
old_sid = self._sid
|
||||||
logger.info("Session error %d on %s/%s, re-authenticating...", code, api, method)
|
logger.info("Session error %d on %s/%s, re-authenticating...", code, api, method)
|
||||||
async with self._reauth_lock:
|
async with self._reauth_lock:
|
||||||
|
# Double-check: if another task already refreshed the SID while
|
||||||
|
# we were waiting on the lock, skip the redundant login.
|
||||||
|
if self._sid == old_sid:
|
||||||
self._sid = None
|
self._sid = None
|
||||||
try:
|
try:
|
||||||
self._sid = await self._auth_manager.login(self)
|
self._sid = await self._auth_manager.login(self)
|
||||||
@@ -348,7 +381,14 @@ class DsmClient:
|
|||||||
logger.debug("DSM POST: %s/%s v%d — %s", api, method, resolved_version, log_form)
|
logger.debug("DSM POST: %s/%s v%d — %s", api, method, resolved_version, log_form)
|
||||||
|
|
||||||
resp = await http.post(url, params=query_params, data=form)
|
resp = await http.post(url, params=query_params, data=form)
|
||||||
|
try:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
url_safe = _scrub_url(str(e.request.url))
|
||||||
|
raise SynologyError(
|
||||||
|
f"HTTP {resp.status_code} from {url_safe}",
|
||||||
|
code=resp.status_code,
|
||||||
|
) from None
|
||||||
body = resp.json()
|
body = resp.json()
|
||||||
|
|
||||||
if body.get("success"):
|
if body.get("success"):
|
||||||
@@ -363,23 +403,25 @@ class DsmClient:
|
|||||||
|
|
||||||
This is the proper way to force an image pull and project restart in DSM
|
This is the proper way to force an image pull and project restart in DSM
|
||||||
Container Manager (confirmed via browser DevTools). The endpoint is a
|
Container Manager (confirmed via browser DevTools). The endpoint is a
|
||||||
Server-Sent Events (SSE) stream; we send the request and close immediately
|
Server-Sent Events (SSE) stream on success; we send the request, check
|
||||||
without reading any of the response body. DSM starts the build upon
|
the response headers, and close without consuming the SSE body. DSM
|
||||||
receiving the request and continues server-side regardless of whether the
|
starts the build upon receiving the request and continues server-side
|
||||||
HTTP connection stays open. Callers should poll SYNO.Docker.Project/list
|
regardless of whether the HTTP connection stays open. Callers should
|
||||||
for the resulting RUNNING status.
|
poll SYNO.Docker.Project/list for the resulting RUNNING status.
|
||||||
|
|
||||||
Fire-and-forget: we only wait long enough to receive the HTTP response
|
Error detection: DSM signals application-level rejection (e.g. project
|
||||||
headers (to detect immediate HTTP-level errors), then close the connection.
|
locked, invalid id) as an HTTP-200 JSON body `{"success": false, ...}`
|
||||||
We never read SSE events, so this returns in < 10 s regardless of how long
|
rather than as an SSE stream. We inspect the `Content-Type` header and,
|
||||||
the image pull takes. Claude Desktop's ~4-minute tool-call timeout is
|
when it is `application/json`, read a small capped prefix of the body
|
||||||
therefore not a concern.
|
to surface the DSM error code immediately instead of forcing the caller
|
||||||
|
into a multi-minute polling timeout. SSE responses are not read.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
project_id: Project UUID from SYNO.Docker.Project/list.
|
project_id: Project UUID from SYNO.Docker.Project/list.
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
httpx.HTTPStatusError: If the HTTP response status indicates an error.
|
SynologyError: If DSM rejects the build with a JSON error body, or
|
||||||
|
if the HTTP response status indicates a transport-level error.
|
||||||
"""
|
"""
|
||||||
await self._ensure_initialized()
|
await self._ensure_initialized()
|
||||||
http = self._get_http()
|
http = self._get_http()
|
||||||
@@ -403,9 +445,10 @@ class DsmClient:
|
|||||||
sys.stderr.flush()
|
sys.stderr.flush()
|
||||||
logger.debug("build_stream: project_id=%s", project_id)
|
logger.debug("build_stream: project_id=%s", project_id)
|
||||||
|
|
||||||
# Fire-and-forget: open the stream, check HTTP status, close immediately.
|
# Fire-and-forget for the SSE body, but detect immediate JSON errors.
|
||||||
# The read timeout only applies to waiting for response *headers*; we never
|
# The read timeout only applies to waiting for response *headers* and
|
||||||
# read the SSE body, so DSM's streaming cannot block this call indefinitely.
|
# for the (small, capped) JSON error body we read; we never consume SSE
|
||||||
|
# events, so DSM's streaming cannot block this call indefinitely.
|
||||||
try:
|
try:
|
||||||
async with http.stream(
|
async with http.stream(
|
||||||
"GET",
|
"GET",
|
||||||
@@ -413,8 +456,36 @@ class DsmClient:
|
|||||||
params=params,
|
params=params,
|
||||||
timeout=httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=5.0),
|
timeout=httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=5.0),
|
||||||
) as resp:
|
) as resp:
|
||||||
|
try:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
# Body intentionally not read. Close context → connection closes.
|
except httpx.HTTPStatusError as e:
|
||||||
|
url_safe = _scrub_url(str(e.request.url))
|
||||||
|
raise SynologyError(
|
||||||
|
f"HTTP {resp.status_code} from {url_safe}",
|
||||||
|
code=resp.status_code,
|
||||||
|
) from None
|
||||||
|
|
||||||
|
content_type = resp.headers.get("content-type", "")
|
||||||
|
if "application/json" in content_type:
|
||||||
|
# DSM rejected the build — read the JSON error body (capped
|
||||||
|
# at ~4 KB; DSM error envelopes are tiny).
|
||||||
|
body = b""
|
||||||
|
async for chunk in resp.aiter_bytes():
|
||||||
|
body += chunk
|
||||||
|
if len(body) >= 4096:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
parsed = json.loads(body.decode("utf-8", errors="replace"))
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
# Malformed response — treat as accepted and let the
|
||||||
|
# caller's polling surface any real failure.
|
||||||
|
return
|
||||||
|
if not parsed.get("success", True):
|
||||||
|
code = parsed.get("error", {}).get("code", 0)
|
||||||
|
raise SynologyError(_error_message(code, api), code=code)
|
||||||
|
# success=true with JSON content-type: odd, treat as accepted.
|
||||||
|
return
|
||||||
|
# SSE or anything else → fire-and-forget, close without reading.
|
||||||
except httpx.ReadTimeout:
|
except httpx.ReadTimeout:
|
||||||
# Headers not received within 10 s, but the GET request was already
|
# Headers not received within 10 s, but the GET request was already
|
||||||
# sent. DSM received it and started the build. Proceed to polling.
|
# sent. DSM received it and started the build. Proceed to polling.
|
||||||
@@ -480,7 +551,14 @@ class DsmClient:
|
|||||||
files={"file": (filename, encoded, "text/plain")},
|
files={"file": (filename, encoded, "text/plain")},
|
||||||
timeout=httpx.Timeout(60.0),
|
timeout=httpx.Timeout(60.0),
|
||||||
)
|
)
|
||||||
|
try:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
url_safe = _scrub_url(str(e.request.url))
|
||||||
|
raise SynologyError(
|
||||||
|
f"HTTP {resp.status_code} from {url_safe}",
|
||||||
|
code=resp.status_code,
|
||||||
|
) from None
|
||||||
body = resp.json()
|
body = resp.json()
|
||||||
|
|
||||||
if body.get("success"):
|
if body.get("success"):
|
||||||
@@ -523,7 +601,14 @@ class DsmClient:
|
|||||||
logger.debug("DSM GET: %s/download v%d — %s", api, resolved_version, log_params)
|
logger.debug("DSM GET: %s/download v%d — %s", api, resolved_version, log_params)
|
||||||
|
|
||||||
resp = await http.get(url, params=params, timeout=httpx.Timeout(60.0))
|
resp = await http.get(url, params=params, timeout=httpx.Timeout(60.0))
|
||||||
|
try:
|
||||||
resp.raise_for_status()
|
resp.raise_for_status()
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
url_safe = _scrub_url(str(e.request.url))
|
||||||
|
raise SynologyError(
|
||||||
|
f"HTTP {resp.status_code} from {url_safe}",
|
||||||
|
code=resp.status_code,
|
||||||
|
) from None
|
||||||
|
|
||||||
content_type = resp.headers.get("content-type", "")
|
content_type = resp.headers.get("content-type", "")
|
||||||
if "application/json" in content_type:
|
if "application/json" in content_type:
|
||||||
|
|||||||
Reference in New Issue
Block a user