feat: v0.3.0 — review welle 2 (M-4, M-5, M-6)
Three resilience and honesty fixes from the v0.2.8 review. Minor version bump because redeploy_project and system_prune return different strings. M-4: trigger_build_stream now converts every non-ReadTimeout httpx.HTTPError (ConnectError, ConnectTimeout, WriteError, RemoteProtocolError, ...) into a SynologyError with a clear message. Previously only ReadTimeout was handled; everything else propagated as a raw httpx exception. redeploy_project now tracks whether stop was actually issued and, when build_stream fails after a successful stop, tells the user the project is in STOPPED state and recommends start_project / retry rather than the misleading "use stop + start separately" workaround. M-5: _wait_for_project_running exits early on BUILD_FAILED / ERROR (new _TERMINAL_FAILURE_STATUSES frozenset). DSM signals these statuses within seconds of a failed image pull; the old polling loop kept waiting up to 5 minutes for RUNNING. redeploy_project now surfaces the terminal status with a BUILD_FAILED-specific hint to update_image_tag. M-6: system_prune preview now enumerates user-created networks that have no containers attached (excluding the three built-in networks bridge/host/none, which Docker never prunes). Previously the preview noted "Unused networks: (not counted)" even though SYNO.Docker.Utils/prune does delete them — users could lose networks they had not been warned about. Tests: - 2 new dsm_client tests: ConnectError and RemoteProtocolError both raise SynologyError, not raw httpx exceptions. - 2 new project tests: recovery hint after stop+build_stream failure (RUNNING case); old workaround retained for the STOPPED case where no stop was issued. - 3 new polling tests: BUILD_FAILED and ERROR each trigger early exit; redeploy_project surfaces BUILD_FAILED with update_image_tag hint. - 2 new system_prune preview tests: counts unused networks correctly, excludes built-ins; network-fetch failure is non-fatal. 245 tests pass. ruff check + ruff format clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -511,6 +511,17 @@ class DsmClient:
|
||||
# Headers not received within 10 s, but the GET request was already
|
||||
# sent. DSM received it and started the build. Proceed to polling.
|
||||
pass
|
||||
except httpx.HTTPError as e:
|
||||
# Other transport-level failures (ConnectError, ConnectTimeout,
|
||||
# WriteError, RemoteProtocolError, …) mean DSM never received the
|
||||
# build request. Surface a clear SynologyError instead of letting
|
||||
# the raw httpx exception bubble up — the caller (redeploy_project)
|
||||
# has typically already stopped the project and needs to know that
|
||||
# the build did not start.
|
||||
raise SynologyError(
|
||||
f"build_stream transport error: {type(e).__name__}: {e}",
|
||||
code=0,
|
||||
) from None
|
||||
|
||||
async def upload_text(
|
||||
self,
|
||||
|
||||
@@ -19,6 +19,12 @@ _POLL_INTERVAL = 2 # seconds between status checks
|
||||
_POLL_TIMEOUT = 30 # seconds for ordinary start polling
|
||||
_BUILD_POLL_TIMEOUT = 300 # seconds for build_stream polling (image pull can be slow)
|
||||
|
||||
# Statuses that mean "stop polling now — this redeploy is not coming back."
|
||||
# DSM signals these typically within seconds of build_stream when the image
|
||||
# pull or container start fails; without an early exit the caller would wait
|
||||
# the full _BUILD_POLL_TIMEOUT for nothing.
|
||||
_TERMINAL_FAILURE_STATUSES = frozenset({"BUILD_FAILED", "ERROR"})
|
||||
|
||||
|
||||
def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None:
|
||||
"""Register all project management tools with the MCP server."""
|
||||
@@ -124,6 +130,10 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non
|
||||
)
|
||||
|
||||
results: list[str] = []
|
||||
# Track whether we issued a stop that DSM accepted. Used to give the
|
||||
# caller an accurate recovery hint if a later step (build_stream)
|
||||
# fails — the project would be left in STOPPED state.
|
||||
stop_was_issued = False
|
||||
|
||||
try:
|
||||
# ── Step 1: Stop ──────────────────────────────────────────────────
|
||||
@@ -133,10 +143,12 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non
|
||||
results.append("Step 1/3: Stopping failed build...")
|
||||
with contextlib.suppress(Exception):
|
||||
await client.request("SYNO.Docker.Project", "stop", params={"id": project_id})
|
||||
stop_was_issued = True
|
||||
results.append(" Stopped.")
|
||||
else: # RUNNING
|
||||
results.append("Step 1/3: Stopping project...")
|
||||
await client.request("SYNO.Docker.Project", "stop", params={"id": project_id})
|
||||
stop_was_issued = True
|
||||
results.append(" Stopped.")
|
||||
|
||||
# ── Step 2: build_stream (pull images + start) ────────────────────
|
||||
@@ -155,6 +167,19 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non
|
||||
if final_status == "RUNNING":
|
||||
results.append(" Project is RUNNING.")
|
||||
results.append(f"\nProject '{project_name}' redeployed successfully.")
|
||||
elif final_status in _TERMINAL_FAILURE_STATUSES:
|
||||
# M-5: DSM signalled a hard failure during polling (e.g.
|
||||
# image pull failed). Surface it immediately rather than
|
||||
# waiting for the full timeout.
|
||||
results.append(f" Redeploy failed — project status is '{final_status}'.")
|
||||
if final_status == "BUILD_FAILED":
|
||||
results.append(
|
||||
" Check the image tag in the compose file "
|
||||
"(update_image_tag) and retry redeploy_project."
|
||||
)
|
||||
results.append(
|
||||
f"\nProject '{project_name}' redeploy aborted (status: {final_status})."
|
||||
)
|
||||
else:
|
||||
results.append(
|
||||
f" Warning: project status is '{final_status}' after "
|
||||
@@ -165,7 +190,18 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non
|
||||
|
||||
except Exception as e:
|
||||
results.append(f"Error during redeploy: {e}")
|
||||
results.append("Workaround: use stop_project + start_project separately.")
|
||||
if stop_was_issued:
|
||||
# M-4: build_stream (or polling) failed AFTER we stopped the
|
||||
# project. The project is now in STOPPED state and the caller
|
||||
# needs to know that — the previous "use stop + start"
|
||||
# workaround was misleading because stop already happened.
|
||||
results.append(
|
||||
f"Note: project '{project_name}' was stopped before this error and is "
|
||||
f"now in STOPPED state. Run start_project('{project_name}') or retry "
|
||||
f"redeploy_project to recover."
|
||||
)
|
||||
else:
|
||||
results.append("Workaround: use stop_project + start_project separately.")
|
||||
|
||||
return "\n".join(results)
|
||||
|
||||
@@ -220,6 +256,12 @@ async def _wait_for_project_running(
|
||||
logger.debug("Polling '%s': status=%s elapsed=%ds", name, current, elapsed)
|
||||
if current == "RUNNING":
|
||||
return current
|
||||
if current in _TERMINAL_FAILURE_STATUSES:
|
||||
# DSM has reported a hard failure (e.g. image pull failed,
|
||||
# container exited immediately). Returning early lets the
|
||||
# caller surface the real cause instead of waiting out the
|
||||
# full timeout.
|
||||
return current
|
||||
# Return whatever status we last saw (or UNKNOWN on repeated failures)
|
||||
project = await _find_project(client, name)
|
||||
return (project.get("status") or "UNKNOWN").upper() if project else "UNKNOWN"
|
||||
|
||||
@@ -15,6 +15,11 @@ if TYPE_CHECKING:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Built-in Docker networks are never removed by `docker network prune`
|
||||
# regardless of attached-container count. Skip them when counting what
|
||||
# the prune will actually delete.
|
||||
_BUILTIN_NETWORKS = frozenset({"bridge", "host", "none"})
|
||||
|
||||
|
||||
def register_system(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None:
|
||||
"""Register all system-level tools with the MCP server."""
|
||||
@@ -132,6 +137,21 @@ def register_system(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None:
|
||||
dangling_size = sum(img.get("size", 0) for img in dangling_images)
|
||||
|
||||
if not confirmed:
|
||||
# M-6: also enumerate networks that would be removed so the
|
||||
# preview matches the actual prune scope. Networks are only
|
||||
# fetched in preview mode — the prune call itself doesn't
|
||||
# need them.
|
||||
unused_networks: list[dict[str, Any]] = []
|
||||
try:
|
||||
net_data = await client.request("SYNO.Docker.Network", "list")
|
||||
for net in net_data.get("network", []) or []:
|
||||
name = net.get("name", "")
|
||||
attached = net.get("containers") or []
|
||||
if not attached and name not in _BUILTIN_NETWORKS:
|
||||
unused_networks.append(net)
|
||||
except Exception as e:
|
||||
logger.debug("Could not fetch networks for prune preview: %s", e)
|
||||
|
||||
lines = ["system_prune — preview (nothing deleted yet):", ""]
|
||||
lines.append(
|
||||
f" Dangling/unused images: {len(dangling_images)} ({_human_size(dangling_size)})"
|
||||
@@ -149,7 +169,12 @@ def register_system(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None:
|
||||
if len(stopped_containers) > 10:
|
||||
lines.append(f" … and {len(stopped_containers) - 10} more")
|
||||
|
||||
lines.append(" Unused networks: (not counted — run prune to remove)")
|
||||
lines.append(f" Unused networks: {len(unused_networks)}")
|
||||
for net in unused_networks[:10]:
|
||||
driver = net.get("driver", "?")
|
||||
lines.append(f" - {net.get('name', '?')} ({driver})")
|
||||
if len(unused_networks) > 10:
|
||||
lines.append(f" … and {len(unused_networks) - 10} more")
|
||||
lines.append("")
|
||||
lines.append(
|
||||
f"Call system_prune(confirmed=True) to free ~{_human_size(dangling_size)}."
|
||||
|
||||
Reference in New Issue
Block a user