From 21fd8e168c6b38534ac8b7657a8b44e1900c0849 Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Tue, 21 Apr 2026 09:37:02 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20dsm=5Fclient=20=E2=80=94=20scrub=20sessi?= =?UTF-8?q?on=20IDs,=20fix=20re-auth=20race,=20detect=20build=5Fstream=20J?= =?UTF-8?q?SON=20errors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Scrub `_sid` from URL in HTTPStatusError messages to prevent session-ID leaks into MCP tool responses (C1). - Re-auth lock now double-checks the SID snapshot to avoid duplicate logins on concurrent 106/107/119 responses (M3). - `trigger_build_stream` again detects immediate JSON error responses from DSM (regression from 0.2.6); SSE streams remain fire-and-forget (C2/M8). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/mcp_synology_container/dsm_client.py | 137 ++++++++++++++++++----- 1 file changed, 111 insertions(+), 26 deletions(-) diff --git a/src/mcp_synology_container/dsm_client.py b/src/mcp_synology_container/dsm_client.py index dee055f..2c77f62 100644 --- a/src/mcp_synology_container/dsm_client.py +++ b/src/mcp_synology_container/dsm_client.py @@ -11,7 +11,9 @@ Thin async client wrapping Synology DSM Web API conventions: from __future__ import annotations import asyncio +import json import logging +import re import sys from typing import TYPE_CHECKING, Any @@ -28,6 +30,19 @@ _SESSION_ERROR_CODES = frozenset({106, 107, 119}) # Parameters to mask in debug logging _SENSITIVE_PARAMS = frozenset({"passwd", "_sid", "device_id", "otp_code", "device_token"}) +# Regex to strip the session-ID value from URL query strings before surfacing +# them in error messages (HTTPStatusError embeds the full request URL). +_SID_QUERY_RE = re.compile(r"(_sid=)[^&\s]*") + + +def _scrub_url(url: str) -> str: + """Replace the value of any `_sid=...` query param with `***`. + + Used to sanitize URLs embedded in `httpx.HTTPStatusError` messages so the + raw DSM session ID never reaches log output or MCP tool responses. + """ + return _SID_QUERY_RE.sub(r"\1***", url) + class SynologyError(Exception): """Raised when DSM API returns a non-success response.""" @@ -185,7 +200,14 @@ class DsmClient: logger.debug("Querying API info from %s", url) resp = await http.get(url, params=params) - resp.raise_for_status() + try: + resp.raise_for_status() + except httpx.HTTPStatusError as e: + url_safe = _scrub_url(str(e.request.url)) + raise SynologyError( + f"HTTP {resp.status_code} from {url_safe}", + code=resp.status_code, + ) from None body = resp.json() if not body.get("success"): @@ -268,7 +290,14 @@ class DsmClient: ) resp = await http.get(url, params=req_params) - resp.raise_for_status() + try: + resp.raise_for_status() + except httpx.HTTPStatusError as e: + url_safe = _scrub_url(str(e.request.url)) + raise SynologyError( + f"HTTP {resp.status_code} from {url_safe}", + code=resp.status_code, + ) from None body = resp.json() if body.get("success"): @@ -281,13 +310,17 @@ class DsmClient: # Transparent re-auth on session errors (one retry only) if code in _SESSION_ERROR_CODES and not _is_retry and self._auth_manager: + old_sid = self._sid logger.info("Session error %d on %s/%s, re-authenticating...", code, api, method) async with self._reauth_lock: - self._sid = None - try: - self._sid = await self._auth_manager.login(self) - except Exception as e: - raise SynologyError(f"Re-authentication failed: {e}", code=code) from e + # Double-check: if another task already refreshed the SID while + # we were waiting on the lock, skip the redundant login. + if self._sid == old_sid: + self._sid = None + try: + self._sid = await self._auth_manager.login(self) + except Exception as e: + raise SynologyError(f"Re-authentication failed: {e}", code=code) from e return await self.request(api, method, version, params, _is_retry=True) raise SynologyError(_error_message(code, api), code=code) @@ -348,7 +381,14 @@ class DsmClient: logger.debug("DSM POST: %s/%s v%d — %s", api, method, resolved_version, log_form) resp = await http.post(url, params=query_params, data=form) - resp.raise_for_status() + try: + resp.raise_for_status() + except httpx.HTTPStatusError as e: + url_safe = _scrub_url(str(e.request.url)) + raise SynologyError( + f"HTTP {resp.status_code} from {url_safe}", + code=resp.status_code, + ) from None body = resp.json() if body.get("success"): @@ -363,23 +403,25 @@ class DsmClient: This is the proper way to force an image pull and project restart in DSM Container Manager (confirmed via browser DevTools). The endpoint is a - Server-Sent Events (SSE) stream; we send the request and close immediately - without reading any of the response body. DSM starts the build upon - receiving the request and continues server-side regardless of whether the - HTTP connection stays open. Callers should poll SYNO.Docker.Project/list - for the resulting RUNNING status. + Server-Sent Events (SSE) stream on success; we send the request, check + the response headers, and close without consuming the SSE body. DSM + starts the build upon receiving the request and continues server-side + regardless of whether the HTTP connection stays open. Callers should + poll SYNO.Docker.Project/list for the resulting RUNNING status. - Fire-and-forget: we only wait long enough to receive the HTTP response - headers (to detect immediate HTTP-level errors), then close the connection. - We never read SSE events, so this returns in < 10 s regardless of how long - the image pull takes. Claude Desktop's ~4-minute tool-call timeout is - therefore not a concern. + Error detection: DSM signals application-level rejection (e.g. project + locked, invalid id) as an HTTP-200 JSON body `{"success": false, ...}` + rather than as an SSE stream. We inspect the `Content-Type` header and, + when it is `application/json`, read a small capped prefix of the body + to surface the DSM error code immediately instead of forcing the caller + into a multi-minute polling timeout. SSE responses are not read. Args: project_id: Project UUID from SYNO.Docker.Project/list. Raises: - httpx.HTTPStatusError: If the HTTP response status indicates an error. + SynologyError: If DSM rejects the build with a JSON error body, or + if the HTTP response status indicates a transport-level error. """ await self._ensure_initialized() http = self._get_http() @@ -403,9 +445,10 @@ class DsmClient: sys.stderr.flush() logger.debug("build_stream: project_id=%s", project_id) - # Fire-and-forget: open the stream, check HTTP status, close immediately. - # The read timeout only applies to waiting for response *headers*; we never - # read the SSE body, so DSM's streaming cannot block this call indefinitely. + # Fire-and-forget for the SSE body, but detect immediate JSON errors. + # The read timeout only applies to waiting for response *headers* and + # for the (small, capped) JSON error body we read; we never consume SSE + # events, so DSM's streaming cannot block this call indefinitely. try: async with http.stream( "GET", @@ -413,8 +456,36 @@ class DsmClient: params=params, timeout=httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=5.0), ) as resp: - resp.raise_for_status() - # Body intentionally not read. Close context → connection closes. + try: + resp.raise_for_status() + except httpx.HTTPStatusError as e: + url_safe = _scrub_url(str(e.request.url)) + raise SynologyError( + f"HTTP {resp.status_code} from {url_safe}", + code=resp.status_code, + ) from None + + content_type = resp.headers.get("content-type", "") + if "application/json" in content_type: + # DSM rejected the build — read the JSON error body (capped + # at ~4 KB; DSM error envelopes are tiny). + body = b"" + async for chunk in resp.aiter_bytes(): + body += chunk + if len(body) >= 4096: + break + try: + parsed = json.loads(body.decode("utf-8", errors="replace")) + except json.JSONDecodeError: + # Malformed response — treat as accepted and let the + # caller's polling surface any real failure. + return + if not parsed.get("success", True): + code = parsed.get("error", {}).get("code", 0) + raise SynologyError(_error_message(code, api), code=code) + # success=true with JSON content-type: odd, treat as accepted. + return + # SSE or anything else → fire-and-forget, close without reading. except httpx.ReadTimeout: # Headers not received within 10 s, but the GET request was already # sent. DSM received it and started the build. Proceed to polling. @@ -480,7 +551,14 @@ class DsmClient: files={"file": (filename, encoded, "text/plain")}, timeout=httpx.Timeout(60.0), ) - resp.raise_for_status() + try: + resp.raise_for_status() + except httpx.HTTPStatusError as e: + url_safe = _scrub_url(str(e.request.url)) + raise SynologyError( + f"HTTP {resp.status_code} from {url_safe}", + code=resp.status_code, + ) from None body = resp.json() if body.get("success"): @@ -523,7 +601,14 @@ class DsmClient: logger.debug("DSM GET: %s/download v%d — %s", api, resolved_version, log_params) resp = await http.get(url, params=params, timeout=httpx.Timeout(60.0)) - resp.raise_for_status() + try: + resp.raise_for_status() + except httpx.HTTPStatusError as e: + url_safe = _scrub_url(str(e.request.url)) + raise SynologyError( + f"HTTP {resp.status_code} from {url_safe}", + code=resp.status_code, + ) from None content_type = resp.headers.get("content-type", "") if "application/json" in content_type: