From 13e10fa52f7a511a2695e6c37d9714422372121a Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Mon, 18 May 2026 09:57:20 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20v0.3.0=20=E2=80=94=20review=20welle=202?= =?UTF-8?q?=20(M-4,=20M-5,=20M-6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three resilience and honesty fixes from the v0.2.8 review. Minor version bump because redeploy_project and system_prune return different strings. M-4: trigger_build_stream now converts every non-ReadTimeout httpx.HTTPError (ConnectError, ConnectTimeout, WriteError, RemoteProtocolError, ...) into a SynologyError with a clear message. Previously only ReadTimeout was handled; everything else propagated as a raw httpx exception. redeploy_project now tracks whether stop was actually issued and, when build_stream fails after a successful stop, tells the user the project is in STOPPED state and recommends start_project / retry rather than the misleading "use stop + start separately" workaround. M-5: _wait_for_project_running exits early on BUILD_FAILED / ERROR (new _TERMINAL_FAILURE_STATUSES frozenset). DSM signals these statuses within seconds of a failed image pull; the old polling loop kept waiting up to 5 minutes for RUNNING. redeploy_project now surfaces the terminal status with a BUILD_FAILED-specific hint to update_image_tag. M-6: system_prune preview now enumerates user-created networks that have no containers attached (excluding the three built-in networks bridge/host/none, which Docker never prunes). Previously the preview noted "Unused networks: (not counted)" even though SYNO.Docker.Utils/prune does delete them — users could lose networks they had not been warned about. Tests: - 2 new dsm_client tests: ConnectError and RemoteProtocolError both raise SynologyError, not raw httpx exceptions. - 2 new project tests: recovery hint after stop+build_stream failure (RUNNING case); old workaround retained for the STOPPED case where no stop was issued. - 3 new polling tests: BUILD_FAILED and ERROR each trigger early exit; redeploy_project surfaces BUILD_FAILED with update_image_tag hint. - 2 new system_prune preview tests: counts unused networks correctly, excludes built-ins; network-fetch failure is non-fatal. 245 tests pass. ruff check + ruff format clean. Co-Authored-By: Claude Opus 4.7 --- CHANGELOG.md | 36 +++++ pyproject.toml | 2 +- src/mcp_synology_container/dsm_client.py | 11 ++ .../modules/projects.py | 44 +++++- src/mcp_synology_container/modules/system.py | 27 +++- tests/test_dsm_client.py | 40 ++++++ tests/test_modules/test_projects.py | 136 ++++++++++++++++++ tests/test_modules/test_system.py | 89 ++++++++++++ uv.lock | 2 +- 9 files changed, 383 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 351d414..35e0f39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,42 @@ All notable changes to this project will be documented in this file. +## [0.3.0] - 2026-05-18 + +### Fixed + +- `DsmClient.trigger_build_stream`: broaden transport-error handling + (M-4). `httpx.ReadTimeout` is still treated as a success signal (the + request was sent; DSM is processing it server-side), but all other + `httpx.HTTPError` subclasses (`ConnectError`, `ConnectTimeout`, + `WriteError`, `RemoteProtocolError`, …) are now converted into a + `SynologyError` with a clear message. Previously these propagated as + raw httpx exceptions and left callers with a stack trace. +- `redeploy_project`: when build_stream (or the polling step) fails + after the project has already been stopped, the response now + explicitly tells the user that the project is in `STOPPED` state and + must be recovered with `start_project` or another `redeploy_project` + call. Previously the response suggested "use stop + start + separately", which was misleading because stop had already happened. +- `_wait_for_project_running`: exit the polling loop early on + `BUILD_FAILED` / `ERROR` (M-5). DSM signals these statuses within + seconds of a failed image pull; the old polling loop kept waiting up + to 5 minutes for `RUNNING`. `redeploy_project` surfaces the terminal + status with a hint to `update_image_tag` and retry when the cause is + `BUILD_FAILED`. +- `system_prune` preview: count unused user-created networks alongside + dangling images and stopped containers (M-6). Built-in networks + (`bridge`, `host`, `none`) are excluded because Docker never prunes + them. Previously the preview noted "Unused networks: (not counted)", + even though the underlying `SYNO.Docker.Utils/prune` call deletes + them — users could lose networks they had not been warned about. + +### Changed + +- Minor version bump because `redeploy_project` and `system_prune` + return different strings (and `redeploy_project` returns earlier on + `BUILD_FAILED`). No tool signatures changed. + ## [0.2.9] - 2026-05-18 ### Fixed diff --git a/pyproject.toml b/pyproject.toml index 9f8bb76..a924c1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "mcp-synology-container" -version = "0.2.9" +version = "0.3.0" description = "MCP server for Synology Container Manager" requires-python = ">=3.12" dependencies = [ diff --git a/src/mcp_synology_container/dsm_client.py b/src/mcp_synology_container/dsm_client.py index 7855dae..9f8aab9 100644 --- a/src/mcp_synology_container/dsm_client.py +++ b/src/mcp_synology_container/dsm_client.py @@ -511,6 +511,17 @@ class DsmClient: # Headers not received within 10 s, but the GET request was already # sent. DSM received it and started the build. Proceed to polling. pass + except httpx.HTTPError as e: + # Other transport-level failures (ConnectError, ConnectTimeout, + # WriteError, RemoteProtocolError, …) mean DSM never received the + # build request. Surface a clear SynologyError instead of letting + # the raw httpx exception bubble up — the caller (redeploy_project) + # has typically already stopped the project and needs to know that + # the build did not start. + raise SynologyError( + f"build_stream transport error: {type(e).__name__}: {e}", + code=0, + ) from None async def upload_text( self, diff --git a/src/mcp_synology_container/modules/projects.py b/src/mcp_synology_container/modules/projects.py index e9e3fb2..0499941 100644 --- a/src/mcp_synology_container/modules/projects.py +++ b/src/mcp_synology_container/modules/projects.py @@ -19,6 +19,12 @@ _POLL_INTERVAL = 2 # seconds between status checks _POLL_TIMEOUT = 30 # seconds for ordinary start polling _BUILD_POLL_TIMEOUT = 300 # seconds for build_stream polling (image pull can be slow) +# Statuses that mean "stop polling now — this redeploy is not coming back." +# DSM signals these typically within seconds of build_stream when the image +# pull or container start fails; without an early exit the caller would wait +# the full _BUILD_POLL_TIMEOUT for nothing. +_TERMINAL_FAILURE_STATUSES = frozenset({"BUILD_FAILED", "ERROR"}) + def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None: """Register all project management tools with the MCP server.""" @@ -124,6 +130,10 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non ) results: list[str] = [] + # Track whether we issued a stop that DSM accepted. Used to give the + # caller an accurate recovery hint if a later step (build_stream) + # fails — the project would be left in STOPPED state. + stop_was_issued = False try: # ── Step 1: Stop ────────────────────────────────────────────────── @@ -133,10 +143,12 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non results.append("Step 1/3: Stopping failed build...") with contextlib.suppress(Exception): await client.request("SYNO.Docker.Project", "stop", params={"id": project_id}) + stop_was_issued = True results.append(" Stopped.") else: # RUNNING results.append("Step 1/3: Stopping project...") await client.request("SYNO.Docker.Project", "stop", params={"id": project_id}) + stop_was_issued = True results.append(" Stopped.") # ── Step 2: build_stream (pull images + start) ──────────────────── @@ -155,6 +167,19 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non if final_status == "RUNNING": results.append(" Project is RUNNING.") results.append(f"\nProject '{project_name}' redeployed successfully.") + elif final_status in _TERMINAL_FAILURE_STATUSES: + # M-5: DSM signalled a hard failure during polling (e.g. + # image pull failed). Surface it immediately rather than + # waiting for the full timeout. + results.append(f" Redeploy failed — project status is '{final_status}'.") + if final_status == "BUILD_FAILED": + results.append( + " Check the image tag in the compose file " + "(update_image_tag) and retry redeploy_project." + ) + results.append( + f"\nProject '{project_name}' redeploy aborted (status: {final_status})." + ) else: results.append( f" Warning: project status is '{final_status}' after " @@ -165,7 +190,18 @@ def register_projects(mcp: FastMCP, config: AppConfig, client: DsmClient) -> Non except Exception as e: results.append(f"Error during redeploy: {e}") - results.append("Workaround: use stop_project + start_project separately.") + if stop_was_issued: + # M-4: build_stream (or polling) failed AFTER we stopped the + # project. The project is now in STOPPED state and the caller + # needs to know that — the previous "use stop + start" + # workaround was misleading because stop already happened. + results.append( + f"Note: project '{project_name}' was stopped before this error and is " + f"now in STOPPED state. Run start_project('{project_name}') or retry " + f"redeploy_project to recover." + ) + else: + results.append("Workaround: use stop_project + start_project separately.") return "\n".join(results) @@ -220,6 +256,12 @@ async def _wait_for_project_running( logger.debug("Polling '%s': status=%s elapsed=%ds", name, current, elapsed) if current == "RUNNING": return current + if current in _TERMINAL_FAILURE_STATUSES: + # DSM has reported a hard failure (e.g. image pull failed, + # container exited immediately). Returning early lets the + # caller surface the real cause instead of waiting out the + # full timeout. + return current # Return whatever status we last saw (or UNKNOWN on repeated failures) project = await _find_project(client, name) return (project.get("status") or "UNKNOWN").upper() if project else "UNKNOWN" diff --git a/src/mcp_synology_container/modules/system.py b/src/mcp_synology_container/modules/system.py index 1a745c5..e4ed105 100644 --- a/src/mcp_synology_container/modules/system.py +++ b/src/mcp_synology_container/modules/system.py @@ -15,6 +15,11 @@ if TYPE_CHECKING: logger = logging.getLogger(__name__) +# Built-in Docker networks are never removed by `docker network prune` +# regardless of attached-container count. Skip them when counting what +# the prune will actually delete. +_BUILTIN_NETWORKS = frozenset({"bridge", "host", "none"}) + def register_system(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None: """Register all system-level tools with the MCP server.""" @@ -132,6 +137,21 @@ def register_system(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None: dangling_size = sum(img.get("size", 0) for img in dangling_images) if not confirmed: + # M-6: also enumerate networks that would be removed so the + # preview matches the actual prune scope. Networks are only + # fetched in preview mode — the prune call itself doesn't + # need them. + unused_networks: list[dict[str, Any]] = [] + try: + net_data = await client.request("SYNO.Docker.Network", "list") + for net in net_data.get("network", []) or []: + name = net.get("name", "") + attached = net.get("containers") or [] + if not attached and name not in _BUILTIN_NETWORKS: + unused_networks.append(net) + except Exception as e: + logger.debug("Could not fetch networks for prune preview: %s", e) + lines = ["system_prune — preview (nothing deleted yet):", ""] lines.append( f" Dangling/unused images: {len(dangling_images)} ({_human_size(dangling_size)})" @@ -149,7 +169,12 @@ def register_system(mcp: FastMCP, config: AppConfig, client: DsmClient) -> None: if len(stopped_containers) > 10: lines.append(f" … and {len(stopped_containers) - 10} more") - lines.append(" Unused networks: (not counted — run prune to remove)") + lines.append(f" Unused networks: {len(unused_networks)}") + for net in unused_networks[:10]: + driver = net.get("driver", "?") + lines.append(f" - {net.get('name', '?')} ({driver})") + if len(unused_networks) > 10: + lines.append(f" … and {len(unused_networks) - 10} more") lines.append("") lines.append( f"Call system_prune(confirmed=True) to free ~{_human_size(dangling_size)}." diff --git a/tests/test_dsm_client.py b/tests/test_dsm_client.py index a393336..e20b9c2 100644 --- a/tests/test_dsm_client.py +++ b/tests/test_dsm_client.py @@ -563,6 +563,46 @@ async def test_build_stream_read_timeout_swallowed() -> None: assert result is None +@pytest.mark.asyncio +async def test_build_stream_connect_error_raises_synology_error() -> None: + """M-4: ConnectError (DSM unreachable) must surface as SynologyError, not raw httpx.""" + async with DsmClient(base_url="https://nas.local:443") as client: + mark_initialized(client) + client._http = AsyncMock() + + ctx = MagicMock() + ctx.__aenter__ = AsyncMock(side_effect=httpx.ConnectError("nas offline")) + ctx.__aexit__ = AsyncMock(return_value=None) + client._http.stream = MagicMock(return_value=ctx) + + with pytest.raises(SynologyError) as exc_info: + await client.trigger_build_stream("proj-1") + + msg = str(exc_info.value) + assert "transport error" in msg + assert "ConnectError" in msg + # Cause is suppressed via `from None` to keep error message clean. + assert exc_info.value.__cause__ is None + + +@pytest.mark.asyncio +async def test_build_stream_remote_protocol_error_raises_synology_error() -> None: + """M-4: RemoteProtocolError (broken response framing) is also converted.""" + async with DsmClient(base_url="https://nas.local:443") as client: + mark_initialized(client) + client._http = AsyncMock() + + ctx = MagicMock() + ctx.__aenter__ = AsyncMock(side_effect=httpx.RemoteProtocolError("server disconnected")) + ctx.__aexit__ = AsyncMock(return_value=None) + client._http.stream = MagicMock(return_value=ctx) + + with pytest.raises(SynologyError) as exc_info: + await client.trigger_build_stream("proj-1") + + assert "RemoteProtocolError" in str(exc_info.value) + + @pytest.mark.asyncio async def test_build_stream_http_500_scrubs_sid() -> None: async with DsmClient(base_url="https://nas.local:443") as client: diff --git a/tests/test_modules/test_projects.py b/tests/test_modules/test_projects.py index 94a76c0..b77c241 100644 --- a/tests/test_modules/test_projects.py +++ b/tests/test_modules/test_projects.py @@ -381,3 +381,139 @@ async def test_redeploy_unknown_status_returns_error(): assert "UPDATING" in result assert "Workaround" in result or "stop_project" in result + + +# ────────────────────────────────────────────────────────────────────── +# M-4: clear recovery hint when build_stream fails after stop succeeded +# ────────────────────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_redeploy_build_stream_transport_error_shows_stopped_recovery_hint(): + """M-4: build_stream transport error after RUNNING-stop must tell the user the + project is now STOPPED and recommend start_project / retry.""" + from mcp_synology_container.dsm_client import SynologyError + + client, calls = make_stateful_redeploy_mock( + "RUNNING", + build_stream_raises=SynologyError( + "build_stream transport error: ConnectError: nas offline", code=0 + ), + ) + tools = make_projects_tools(client) + + with patch("mcp_synology_container.modules.projects.asyncio.sleep"): + result = await tools["redeploy_project"]("myapp", confirmed=True) + + # No raw stack trace — clean message + assert "transport error" in result + assert "ConnectError" in result + # The recovery hint must point at the actual situation + assert "STOPPED" in result + assert "start_project" in result + # Old misleading workaround text must NOT appear + assert "stop_project + start_project separately" not in result + + +@pytest.mark.asyncio +async def test_redeploy_build_stream_error_on_stopped_project_keeps_old_workaround(): + """If the project was STOPPED to begin with, no stop was issued, so the + 'STOPPED recovery' hint is NOT appropriate — keep the original workaround.""" + from mcp_synology_container.dsm_client import SynologyError + + client, calls = make_stateful_redeploy_mock( + "STOPPED", + build_stream_raises=SynologyError("build failed", code=114), + ) + tools = make_projects_tools(client) + + with patch("mcp_synology_container.modules.projects.asyncio.sleep"): + result = await tools["redeploy_project"]("myapp", confirmed=True) + + assert "build failed" in result or "Error during redeploy" in result + # Stop was never issued; new recovery hint should not appear + assert "was stopped before this error" not in result + + +# ────────────────────────────────────────────────────────────────────── +# M-5: polling exits early on BUILD_FAILED / ERROR +# ────────────────────────────────────────────────────────────────────── + + +@pytest.mark.asyncio +async def test_wait_for_project_running_returns_early_on_build_failed(): + """_wait_for_project_running must exit as soon as DSM reports BUILD_FAILED, + not wait the full timeout.""" + from mcp_synology_container.modules.projects import _wait_for_project_running + + client = AsyncMock() + + async def mock_request(api, method, **kwargs): + if method == "list": + return project_list("BUILD_FAILED") + return {} + + client.request.side_effect = mock_request + + with patch("mcp_synology_container.modules.projects.asyncio.sleep"): + # 100s timeout, 2s interval — if the early-exit isn't there the test + # would still terminate quickly because sleep is mocked, but the call + # count assertion below catches a non-exiting loop. + result = await _wait_for_project_running(client, "myapp", timeout=100, interval=2) + + assert result == "BUILD_FAILED" + # Only a few list() calls — exit was on the first poll iteration. + list_calls = [c for c in client.request.call_args_list if c.args[1] == "list"] + assert len(list_calls) <= 2 + + +@pytest.mark.asyncio +async def test_wait_for_project_running_returns_early_on_error(): + from mcp_synology_container.modules.projects import _wait_for_project_running + + client = AsyncMock() + + async def mock_request(api, method, **kwargs): + if method == "list": + return project_list("ERROR") + return {} + + client.request.side_effect = mock_request + + with patch("mcp_synology_container.modules.projects.asyncio.sleep"): + result = await _wait_for_project_running(client, "myapp", timeout=100, interval=2) + + assert result == "ERROR" + + +@pytest.mark.asyncio +async def test_redeploy_surfaces_build_failed_with_hint(): + """When polling reports BUILD_FAILED, redeploy_project must include a clear + hint to inspect the image tag and retry.""" + client = AsyncMock() + build_done = False + + async def mock_request(api, method, **kwargs): + if method == "list": + return project_list("BUILD_FAILED") if build_done else project_list("RUNNING") + return {} + + async def mock_build_stream(project_id): + nonlocal build_done + build_done = True + + client.request.side_effect = mock_request + client.trigger_build_stream = AsyncMock(side_effect=mock_build_stream) + tools = make_projects_tools(client) + + with patch("mcp_synology_container.modules.projects.asyncio.sleep"): + result = await tools["redeploy_project"]("myapp", confirmed=True) + + assert "Redeploy failed" in result + assert "BUILD_FAILED" in result + assert "update_image_tag" in result + assert "redeployed successfully" not in result + # Polling must have exited early, not run to the full timeout. + list_calls = [c for c in client.request.call_args_list if c.args[1] == "list"] + # Generous upper bound — early exit means handful of polls, not hundreds. + assert len(list_calls) <= 5 diff --git a/tests/test_modules/test_system.py b/tests/test_modules/test_system.py index 5ee6864..7d51906 100644 --- a/tests/test_modules/test_system.py +++ b/tests/test_modules/test_system.py @@ -305,3 +305,92 @@ async def test_system_prune_api_error(): result = await tools["system_prune"](confirmed=True) assert "Error" in result + + +# ────────────────────────────────────────────────────────────────────── +# M-6: system_prune preview now counts unused networks +# ────────────────────────────────────────────────────────────────────── + + +SAMPLE_NETWORKS_FOR_PRUNE = { + "network": [ + # User-created, no containers attached → will be pruned + {"name": "orphan_net", "driver": "bridge", "containers": []}, + # User-created, in use → must NOT be counted + { + "name": "myapp_default", + "driver": "bridge", + "containers": ["web", "db"], + }, + # Built-in networks: Docker never prunes these even if empty + {"name": "bridge", "driver": "bridge", "containers": []}, + {"name": "host", "driver": "host", "containers": []}, + {"name": "none", "driver": "null", "containers": []}, + # Another user-created empty network + {"name": "legacy_net", "driver": "bridge", "containers": []}, + ] +} + + +@pytest.mark.asyncio +async def test_system_prune_preview_counts_unused_networks() -> None: + """M-6: preview must enumerate user-created networks with no containers, + skipping the three built-in networks (bridge/host/none).""" + from mcp_synology_container.modules.system import register_system + + client = AsyncMock() + + async def mock_request(api, method, **kwargs): + if api == "SYNO.Docker.Image": + return SAMPLE_IMAGES + if api == "SYNO.Docker.Container": + return SAMPLE_CONTAINERS + if api == "SYNO.Docker.Network": + return SAMPLE_NETWORKS_FOR_PRUNE + return {} + + client.request.side_effect = mock_request + mcp, tools = make_mock_mcp() + register_system(mcp, make_config(), client) + + result = await tools["system_prune"]() + + # Two unused user-created networks; the three built-ins must not appear. + assert "Unused networks: 2" in result + assert "orphan_net" in result + assert "legacy_net" in result + # Built-in network names must not appear in the prune preview. + assert " - bridge " not in result + assert " - host " not in result + assert " - none " not in result + # Network with containers must not be listed. + assert "myapp_default" not in result + # Old "not counted" placeholder must be gone. + assert "not counted" not in result + + +@pytest.mark.asyncio +async def test_system_prune_preview_network_fetch_failure_is_nonfatal() -> None: + """If the network list fetch fails, the preview still works (0 networks).""" + from mcp_synology_container.dsm_client import SynologyError + from mcp_synology_container.modules.system import register_system + + client = AsyncMock() + + async def mock_request(api, method, **kwargs): + if api == "SYNO.Docker.Image": + return SAMPLE_IMAGES + if api == "SYNO.Docker.Container": + return SAMPLE_CONTAINERS + if api == "SYNO.Docker.Network": + raise SynologyError("network list failed", code=100) + return {} + + client.request.side_effect = mock_request + mcp, tools = make_mock_mcp() + register_system(mcp, make_config(), client) + + result = await tools["system_prune"]() + # Preview still renders; networks count falls back to 0. + assert "preview" in result.lower() + assert "Unused networks: 0" in result diff --git a/uv.lock b/uv.lock index ad244f4..943a16e 100644 --- a/uv.lock +++ b/uv.lock @@ -362,7 +362,7 @@ wheels = [ [[package]] name = "mcp-synology-container" -version = "0.2.9" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "click" },