feat: v0.3.0 — review welle 2 (M-4, M-5, M-6)
Three resilience and honesty fixes from the v0.2.8 review. Minor version bump because redeploy_project and system_prune return different strings. M-4: trigger_build_stream now converts every non-ReadTimeout httpx.HTTPError (ConnectError, ConnectTimeout, WriteError, RemoteProtocolError, ...) into a SynologyError with a clear message. Previously only ReadTimeout was handled; everything else propagated as a raw httpx exception. redeploy_project now tracks whether stop was actually issued and, when build_stream fails after a successful stop, tells the user the project is in STOPPED state and recommends start_project / retry rather than the misleading "use stop + start separately" workaround. M-5: _wait_for_project_running exits early on BUILD_FAILED / ERROR (new _TERMINAL_FAILURE_STATUSES frozenset). DSM signals these statuses within seconds of a failed image pull; the old polling loop kept waiting up to 5 minutes for RUNNING. redeploy_project now surfaces the terminal status with a BUILD_FAILED-specific hint to update_image_tag. M-6: system_prune preview now enumerates user-created networks that have no containers attached (excluding the three built-in networks bridge/host/none, which Docker never prunes). Previously the preview noted "Unused networks: (not counted)" even though SYNO.Docker.Utils/prune does delete them — users could lose networks they had not been warned about. Tests: - 2 new dsm_client tests: ConnectError and RemoteProtocolError both raise SynologyError, not raw httpx exceptions. - 2 new project tests: recovery hint after stop+build_stream failure (RUNNING case); old workaround retained for the STOPPED case where no stop was issued. - 3 new polling tests: BUILD_FAILED and ERROR each trigger early exit; redeploy_project surfaces BUILD_FAILED with update_image_tag hint. - 2 new system_prune preview tests: counts unused networks correctly, excludes built-ins; network-fetch failure is non-fatal. 245 tests pass. ruff check + ruff format clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -19,6 +19,12 @@ _POLL_INTERVAL = 2 # seconds between status checks
|
||||
_POLL_TIMEOUT = 30 # seconds for ordinary start polling
|
||||
_BUILD_POLL_TIMEOUT = 300 # seconds for build_stream polling (image pull can be slow)
|
||||
|
||||
# Statuses that mean "stop polling now — this redeploy is not coming back."
|
||||
# DSM signals these typically within seconds of build_stream when the image
|
||||
# pull or container start fails; without an early exit the caller would wait
|
||||
# the full _BUILD_POLL_TIMEOUT for nothing.
|
||||
_TERMINAL_FAILURE_STATUSES = frozenset({"BUILD_FAILED", "ERROR"})
|
||||
|
||||
|
||||
def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None:
|
||||
"""Register all project management tools with the MCP server."""
|
||||
@@ -124,6 +130,10 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non
|
||||
)
|
||||
|
||||
results: list[str] = []
|
||||
# Track whether we issued a stop that DSM accepted. Used to give the
|
||||
# caller an accurate recovery hint if a later step (build_stream)
|
||||
# fails — the project would be left in STOPPED state.
|
||||
stop_was_issued = False
|
||||
|
||||
try:
|
||||
# ── Step 1: Stop ──────────────────────────────────────────────────
|
||||
@@ -133,10 +143,12 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non
|
||||
results.append("Step 1/3: Stopping failed build...")
|
||||
with contextlib.suppress(Exception):
|
||||
await client.request("SYNO.Docker.Project", "stop", params={"id": project_id})
|
||||
stop_was_issued = True
|
||||
results.append(" Stopped.")
|
||||
else: # RUNNING
|
||||
results.append("Step 1/3: Stopping project...")
|
||||
await client.request("SYNO.Docker.Project", "stop", params={"id": project_id})
|
||||
stop_was_issued = True
|
||||
results.append(" Stopped.")
|
||||
|
||||
# ── Step 2: build_stream (pull images + start) ────────────────────
|
||||
@@ -155,6 +167,19 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non
|
||||
if final_status == "RUNNING":
|
||||
results.append(" Project is RUNNING.")
|
||||
results.append(f"\nProject '{project_name}' redeployed successfully.")
|
||||
elif final_status in _TERMINAL_FAILURE_STATUSES:
|
||||
# M-5: DSM signalled a hard failure during polling (e.g.
|
||||
# image pull failed). Surface it immediately rather than
|
||||
# waiting for the full timeout.
|
||||
results.append(f" Redeploy failed — project status is '{final_status}'.")
|
||||
if final_status == "BUILD_FAILED":
|
||||
results.append(
|
||||
" Check the image tag in the compose file "
|
||||
"(update_image_tag) and retry redeploy_project."
|
||||
)
|
||||
results.append(
|
||||
f"\nProject '{project_name}' redeploy aborted (status: {final_status})."
|
||||
)
|
||||
else:
|
||||
results.append(
|
||||
f" Warning: project status is '{final_status}' after "
|
||||
@@ -165,7 +190,18 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non
|
||||
|
||||
except Exception as e:
|
||||
results.append(f"Error during redeploy: {e}")
|
||||
results.append("Workaround: use stop_project + start_project separately.")
|
||||
if stop_was_issued:
|
||||
# M-4: build_stream (or polling) failed AFTER we stopped the
|
||||
# project. The project is now in STOPPED state and the caller
|
||||
# needs to know that — the previous "use stop + start"
|
||||
# workaround was misleading because stop already happened.
|
||||
results.append(
|
||||
f"Note: project '{project_name}' was stopped before this error and is "
|
||||
f"now in STOPPED state. Run start_project('{project_name}') or retry "
|
||||
f"redeploy_project to recover."
|
||||
)
|
||||
else:
|
||||
results.append("Workaround: use stop_project + start_project separately.")
|
||||
|
||||
return "\n".join(results)
|
||||
|
||||
@@ -220,6 +256,12 @@ async def _wait_for_project_running(
|
||||
logger.debug("Polling '%s': status=%s elapsed=%ds", name, current, elapsed)
|
||||
if current == "RUNNING":
|
||||
return current
|
||||
if current in _TERMINAL_FAILURE_STATUSES:
|
||||
# DSM has reported a hard failure (e.g. image pull failed,
|
||||
# container exited immediately). Returning early lets the
|
||||
# caller surface the real cause instead of waiting out the
|
||||
# full timeout.
|
||||
return current
|
||||
# Return whatever status we last saw (or UNKNOWN on repeated failures)
|
||||
project = await _find_project(client, name)
|
||||
return (project.get("status") or "UNKNOWN").upper() if project else "UNKNOWN"
|
||||
|
||||
Reference in New Issue
Block a user