From c848ebc8ce4b7451ce7105067cb3c4fc83645bc7 Mon Sep 17 00:00:00 2001
From: pashpashpash <nik@vault77.ai>
Date: Sun, 12 Apr 2026 18:52:22 -0700
Subject: [PATCH] agents: split GPT-5 prompt and retry behavior (#65597)

* agents: split GPT-5 prompt and retry behavior

* agents: fix GPT-5 review follow-ups

* agents: address GPT-5 review follow-ups

* agents: avoid replaying side-effectful GPT retries

* agents: mark subagent control as mutating

* agents: fail closed on single-action retries

* commands: stabilize channel legacy doctor migration test

* agents: narrow single-action retry promise trigger
---
 extensions/openai/index.test.ts               |  28 ++-
 extensions/openai/prompt-overlay.ts           |  23 ++-
 .../run.incomplete-turn.test.ts               | 160 +++++++++++++++++-
 .../pi-embedded-runner/run/incomplete-turn.ts |  69 +++++++-
 ...-embedded-subscribe.handlers.tools.test.ts |  65 +++++++
 src/agents/tool-mutation.test.ts              |   8 +
 src/agents/tool-mutation.ts                   |   3 +
 .../channel-legacy-config-migrate.test.ts     |  13 +-
 8 files changed, 352 insertions(+), 17 deletions(-)

diff --git a/extensions/openai/index.test.ts b/extensions/openai/index.test.ts
index 320e83c1bbb..61be535cfb8 100644
--- a/extensions/openai/index.test.ts
+++ b/extensions/openai/index.test.ts
@@ -14,6 +14,7 @@ import {
   OPENAI_FRIENDLY_PROMPT_OVERLAY,
   OPENAI_GPT5_EXECUTION_BIAS,
   OPENAI_GPT5_OUTPUT_CONTRACT,
+  OPENAI_GPT5_TOOL_CALL_STYLE,
 } from "./prompt-overlay.js";
 
 const runtimeMocks = vi.hoisted(() => ({
@@ -365,7 +366,7 @@ describe("openai plugin", () => {
     };
 
     expect(openaiProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -382,7 +383,7 @@ describe("openai plugin", () => {
       "Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
     );
     expect(codexProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -454,9 +455,22 @@ describe("openai plugin", () => {
     expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
       "Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
     );
+    expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
+      "Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.",
+    );
+    expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
+      "If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.",
+    );
     expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
       "Do prerequisite lookup or discovery before dependent actions.",
     );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
+      "Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.",
+    );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
+      "When a first-class tool exists for an action, use the tool instead of asking the user to run a command.",
+    );
+    expect(OPENAI_GPT5_TOOL_CALL_STYLE).not.toContain("/approve");
     expect(OPENAI_GPT5_OUTPUT_CONTRACT).toContain(
       "Return the requested sections only, in the requested order.",
     );
@@ -486,7 +500,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -514,7 +528,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
       },
@@ -540,7 +554,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
       },
@@ -567,7 +581,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
@@ -594,7 +608,7 @@ describe("openai plugin", () => {
         agentId: undefined,
       }),
     ).toEqual({
-      stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+      stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
       sectionOverrides: {
         interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
         execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
diff --git a/extensions/openai/prompt-overlay.ts b/extensions/openai/prompt-overlay.ts
index c323f50916f..89ee56f6621 100644
--- a/extensions/openai/prompt-overlay.ts
+++ b/extensions/openai/prompt-overlay.ts
@@ -64,11 +64,20 @@ Do not use em dashes unless the user explicitly asks for them or they are requir
 
 export const OPENAI_GPT5_EXECUTION_BIAS = `## Execution Bias
 
-Start the real work in the same turn when the next step is clear.
+Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.
+Commentary-only turns are incomplete when tools are available and the next action is clear.
+If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.
 Do prerequisite lookup or discovery before dependent actions.
-If another tool call would likely improve correctness or completeness, keep going instead of stopping at partial progress.
 Multi-part requests stay incomplete until every requested item is handled or clearly marked blocked.
-Before the final answer, quickly verify correctness, coverage, formatting, and obvious side effects.`;
+Act first, then verify if needed. Do not pause to summarize or verify before taking the next action.`;
+
+export const OPENAI_GPT5_TOOL_CALL_STYLE = `## Tool Call Style
+
+Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.
+When a first-class tool exists for an action, use the tool instead of asking the user to run a command.
+If multiple tool calls are needed, call them in sequence without stopping to explain between calls.
+Default: do not narrate routine, low-risk tool calls (just call the tool).
+Narrate only when it genuinely helps: complex multi-step work, sensitive actions like deletions, or when the user explicitly asks for commentary.`;
 
 export type OpenAIPromptOverlayMode = "friendly" | "off";
 
@@ -103,8 +112,14 @@ export function resolveOpenAISystemPromptContribution(params: {
   ) {
     return undefined;
   }
+  // tool_call_style is NOT overridden via sectionOverrides because the
+  // default section includes dynamic channel-specific approval guidance
+  // from buildExecApprovalPromptGuidance() that varies per runtime
+  // channel. Overriding it with a static string would lose that dynamic
+  // content. Instead, the tool-first reinforcement lives in stablePrefix
+  // so it's always present alongside the default tool_call_style section.
   return {
-    stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
+    stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
     sectionOverrides: {
       execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
       ...(params.mode === "friendly" ? { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY } : {}),
diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
index 9093aee5a3e..b38283370f5 100644
--- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
+++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts
@@ -10,8 +10,10 @@ import {
   resetRunOverflowCompactionHarnessMocks,
 } from "./run.overflow-compaction.harness.js";
 import {
+  buildAttemptReplayMetadata,
   extractPlanningOnlyPlanDetails,
   isLikelyExecutionAckPrompt,
+  PLANNING_ONLY_RETRY_INSTRUCTION,
   resolveAckExecutionFastPathInstruction,
   resolvePlanningOnlyRetryLimit,
   resolvePlanningOnlyRetryInstruction,
@@ -281,7 +283,10 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
       timedOut: false,
       attempt: makeAttemptResult({
         assistantTexts: ["I'll inspect the code, make the change, and run the checks."],
-        toolMetas: [{ toolName: "bash", meta: "ls" }],
+        toolMetas: [
+          { toolName: "read", meta: "path=src/index.ts" },
+          { toolName: "search", meta: "pattern=runEmbeddedPiAgent" },
+        ],
       }),
     });
 
@@ -436,3 +441,156 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
     ).toBe("paused");
   });
 });
+
+describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
+  const openaiParams = { provider: "openai", modelId: "gpt-5.4" } as const;
+
+  function makeAttemptWithTools(
+    toolNames: string[],
+    assistantText: string,
+  ): Parameters<typeof resolvePlanningOnlyRetryInstruction>[0]["attempt"] {
+    const toolMetas = toolNames.map((toolName) => ({ toolName }));
+    return {
+      toolMetas,
+      assistantTexts: [assistantText],
+      lastAssistant: { stopReason: "stop" },
+      itemLifecycle: { startedCount: toolNames.length },
+      replayMetadata: buildAttemptReplayMetadata({
+        toolMetas,
+        didSendViaMessagingTool: false,
+      }),
+      clientToolCall: null,
+      yieldDetected: false,
+      didSendDeterministicApprovalPrompt: false,
+      didSendViaMessagingTool: false,
+      lastToolError: null,
+    } as unknown as Parameters<typeof resolvePlanningOnlyRetryInstruction>[0]["attempt"];
+  }
+
+  it("retries when exactly 1 non-plan tool call plus 'i can do that' prose is detected", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I can do that next."),
+    });
+
+    expect(result).toBe(PLANNING_ONLY_RETRY_INSTRUCTION);
+  });
+
+  it("retries when exactly 1 non-plan tool call plus planning prose is detected", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I'll analyze the structure next."),
+    });
+
+    expect(result).toBe(PLANNING_ONLY_RETRY_INSTRUCTION);
+  });
+
+  it("does not retry when 2+ non-plan tool calls are present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read", "search"], "I'll verify the output."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus completion language is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "Done. The file looks correct."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus 'let me know' handoff is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "Let me know if you need anything else."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus an answer-style summary is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(
+        ["read"],
+        "I'll summarize the root cause: the provider auth scope is missing.",
+      ),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus a future-tense description is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(
+        ["read"],
+        "I'll describe the issue: the provider auth scope is missing.",
+      ),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 safe tool call is followed by answer prose joined with 'and'", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I'll explain and recommend a fix."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when 1 tool call plus a bare 'i can do that' reply is present", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["read"], "I can do that."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when the lone tool call already had side effects", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["sessions_spawn"], "I'll continue from there next."),
+    });
+
+    expect(result).toBeNull();
+  });
+
+  it("does not retry when the lone tool call is unclassified", () => {
+    const result = resolvePlanningOnlyRetryInstruction({
+      ...openaiParams,
+      aborted: false,
+      timedOut: false,
+      attempt: makeAttemptWithTools(["vendor_widget"], "I'll continue from there next."),
+    });
+
+    expect(result).toBeNull();
+  });
+});
diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
index 06962c7e66d..d6266cf4d7a 100644
--- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts
+++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts
@@ -54,6 +54,21 @@ const PLANNING_ONLY_COMPLETION_RE =
   /\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i;
 const PLANNING_ONLY_HEADING_RE = /^(?:plan|steps?|next steps?)\s*:/i;
 const PLANNING_ONLY_BULLET_RE = /^(?:[-*•]\s+|\d+[.)]\s+)/u;
+const PLANNING_ONLY_MAX_VISIBLE_TEXT = 700;
+const SINGLE_ACTION_EXPLICIT_CONTINUATION_RE =
+  /\b(?:going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|then[, ]+i(?:'ll| will)|i can do that next|let me (?!know\b)\w+(?:\s+\w+){0,3}\s+(?:next|then|first)\b)/i;
+const SINGLE_ACTION_MULTI_STEP_PROMISE_RE =
+  /\bi(?:'ll| will)\b(?=[^.!?]{0,160}\b(?:next|then|after(?:wards)?|once)\b)/i;
+const SINGLE_ACTION_RESULT_STYLE_RE =
+  /\b(?:i(?:'ll| will)\s+(?:summarize|explain|share|show|report|describe|clarify|answer|recap)(?:\s+\w+){0,4}\s*:|(?:here(?:'s| is)|summary|result|answer|findings?|root cause)\s*:)/i;
+const SINGLE_ACTION_RETRY_SAFE_TOOL_NAMES = new Set([
+  "read",
+  "search",
+  "find",
+  "grep",
+  "glob",
+  "ls",
+]);
 const DEFAULT_PLANNING_ONLY_RETRY_LIMIT = 1;
 const STRICT_AGENTIC_PLANNING_ONLY_RETRY_LIMIT = 2;
 const ACK_EXECUTION_NORMALIZED_SET = new Set([
@@ -284,10 +299,51 @@ function countPlanOnlyToolMetas(toolMetas: PlanningOnlyAttempt["toolMetas"]): nu
   return toolMetas.filter((entry) => entry.toolName === "update_plan").length;
 }
 
+function countNonPlanToolCalls(toolMetas: PlanningOnlyAttempt["toolMetas"]): number {
+  return toolMetas.filter((entry) => entry.toolName !== "update_plan").length;
+}
+
 function hasNonPlanToolActivity(toolMetas: PlanningOnlyAttempt["toolMetas"]): boolean {
   return toolMetas.some((entry) => entry.toolName !== "update_plan");
 }
 
+function hasSingleRetrySafeNonPlanTool(toolMetas: PlanningOnlyAttempt["toolMetas"]): boolean {
+  const nonPlanToolNames = toolMetas
+    .map((entry) => normalizeLowercaseStringOrEmpty(entry.toolName))
+    .filter((toolName) => toolName && toolName !== "update_plan");
+  return (
+    nonPlanToolNames.length === 1 &&
+    SINGLE_ACTION_RETRY_SAFE_TOOL_NAMES.has(nonPlanToolNames[0] ?? "")
+  );
+}
+
+/**
+ * Treat a turn with exactly one non-plan tool call plus visible "I'll do X
+ * next" prose as effectively planning-only from the user's perspective. This
+ * closes the one-action-then-narrative loophole without changing the 2+ tool
+ * call path, which still counts as real multi-step progress.
+ */
+function isSingleActionThenNarrativePattern(params: {
+  toolMetas: PlanningOnlyAttempt["toolMetas"];
+  assistantTexts: readonly string[];
+}): boolean {
+  const nonPlanCount = countNonPlanToolCalls(params.toolMetas);
+  if (nonPlanCount !== 1) {
+    return false;
+  }
+  const text = params.assistantTexts.join("\n\n").trim();
+  if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT) {
+    return false;
+  }
+  if (SINGLE_ACTION_RESULT_STYLE_RE.test(text)) {
+    return false;
+  }
+  return (
+    SINGLE_ACTION_EXPLICIT_CONTINUATION_RE.test(text) ||
+    SINGLE_ACTION_MULTI_STEP_PROMISE_RE.test(text)
+  );
+}
+
 export function resolvePlanningOnlyRetryLimit(
   executionContract?: EmbeddedPiExecutionContract,
 ): number {
@@ -304,6 +360,12 @@ export function resolvePlanningOnlyRetryInstruction(params: {
   attempt: PlanningOnlyAttempt;
 }): string | null {
   const planOnlyToolMetaCount = countPlanOnlyToolMetas(params.attempt.toolMetas);
+  const singleActionNarrative = isSingleActionThenNarrativePattern({
+    toolMetas: params.attempt.toolMetas,
+    assistantTexts: params.attempt.assistantTexts,
+  });
+  const allowSingleActionRetryBypass =
+    singleActionNarrative && hasSingleRetrySafeNonPlanTool(params.attempt.toolMetas);
   if (
     !shouldApplyPlanningOnlyRetryGuard({
       provider: params.provider,
@@ -316,8 +378,9 @@ export function resolvePlanningOnlyRetryInstruction(params: {
     params.attempt.didSendDeterministicApprovalPrompt ||
     params.attempt.didSendViaMessagingTool ||
     params.attempt.lastToolError ||
-    hasNonPlanToolActivity(params.attempt.toolMetas) ||
-    params.attempt.itemLifecycle.startedCount > planOnlyToolMetaCount ||
+    (hasNonPlanToolActivity(params.attempt.toolMetas) && !allowSingleActionRetryBypass) ||
+    (params.attempt.itemLifecycle.startedCount > planOnlyToolMetaCount &&
+      !allowSingleActionRetryBypass) ||
     params.attempt.replayMetadata.hadPotentialSideEffects
   ) {
     return null;
@@ -329,7 +392,7 @@ export function resolvePlanningOnlyRetryInstruction(params: {
   }
 
   const text = params.attempt.assistantTexts.join("\n\n").trim();
-  if (!text || text.length > 700 || text.includes("```")) {
+  if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT || text.includes("```")) {
     return null;
   }
   if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningOnlyFormat(text)) {
diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
index d866ff4a146..661f0d4ebe1 100644
--- a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
+++ b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts
@@ -283,6 +283,71 @@ describe("handleToolExecutionEnd mutating failure recovery", () => {
     });
   });
 
+  it("marks successful subagents control actions as replay-invalid", async () => {
+    const { ctx } = createTestContext();
+
+    await handleToolExecutionStart(
+      ctx as never,
+      {
+        type: "tool_execution_start",
+        toolName: "subagents",
+        toolCallId: "tool-subagents-kill",
+        args: {
+          action: "kill",
+          target: "worker-1",
+        },
+      } as never,
+    );
+
+    await handleToolExecutionEnd(
+      ctx as never,
+      {
+        type: "tool_execution_end",
+        toolName: "subagents",
+        toolCallId: "tool-subagents-kill",
+        isError: false,
+        result: { status: "ok", action: "kill", target: "worker-1" },
+      } as never,
+    );
+
+    expect(ctx.state.replayState).toEqual({
+      replayInvalid: true,
+      hadPotentialSideEffects: true,
+    });
+  });
+
+  it("keeps read-only subagents list actions replay-safe", async () => {
+    const { ctx } = createTestContext();
+
+    await handleToolExecutionStart(
+      ctx as never,
+      {
+        type: "tool_execution_start",
+        toolName: "subagents",
+        toolCallId: "tool-subagents-list",
+        args: {
+          action: "list",
+        },
+      } as never,
+    );
+
+    await handleToolExecutionEnd(
+      ctx as never,
+      {
+        type: "tool_execution_end",
+        toolName: "subagents",
+        toolCallId: "tool-subagents-list",
+        isError: false,
+        result: { status: "ok", action: "list", total: 0, text: "no active subagents." },
+      } as never,
+    );
+
+    expect(ctx.state.replayState).toEqual({
+      replayInvalid: false,
+      hadPotentialSideEffects: false,
+    });
+  });
+
   it("keeps successful mutating retries replay-invalid after an earlier tool failure", async () => {
     const { ctx } = createTestContext();
 
diff --git a/src/agents/tool-mutation.test.ts b/src/agents/tool-mutation.test.ts
index 9ae7f7148e2..780b4dbd749 100644
--- a/src/agents/tool-mutation.test.ts
+++ b/src/agents/tool-mutation.test.ts
@@ -58,6 +58,13 @@ describe("tool mutation helpers", () => {
       buildToolMutationState("message", { action: "send", to: "telegram:1" }).mutatingAction,
     ).toBe(true);
     expect(buildToolMutationState("browser", { action: "list" }).mutatingAction).toBe(false);
+    expect(
+      buildToolMutationState("subagents", { action: "kill", target: "worker-1" }).mutatingAction,
+    ).toBe(true);
+    expect(
+      buildToolMutationState("subagents", { action: "steer", target: "worker-1" }).mutatingAction,
+    ).toBe(true);
+    expect(buildToolMutationState("subagents", { action: "list" }).mutatingAction).toBe(false);
   });
 
   it("matches tool actions by fingerprint and fails closed on asymmetric data", () => {
@@ -82,6 +89,7 @@ describe("tool mutation helpers", () => {
   });
 
   it("keeps legacy name-only mutating heuristics for payload fallback", () => {
+    expect(isLikelyMutatingToolName("sessions_spawn")).toBe(true);
     expect(isLikelyMutatingToolName("sessions_send")).toBe(true);
     expect(isLikelyMutatingToolName("browser_actions")).toBe(true);
     expect(isLikelyMutatingToolName("message_slack")).toBe(true);
diff --git a/src/agents/tool-mutation.ts b/src/agents/tool-mutation.ts
index d9349ba4654..65ee228856e 100644
--- a/src/agents/tool-mutation.ts
+++ b/src/agents/tool-mutation.ts
@@ -12,6 +12,7 @@ const MUTATING_TOOL_NAMES = new Set([
   "bash",
   "process",
   "message",
+  "sessions_spawn",
   "sessions_send",
   "cron",
   "gateway",
@@ -129,6 +130,8 @@ export function isMutatingToolCall(toolName: string, args: unknown): boolean {
         typeof record?.content === "string" ||
         typeof record?.message === "string"
       );
+    case "subagents":
+      return action === "kill" || action === "steer";
     case "session_status":
       return typeof record?.model === "string" && record.model.trim().length > 0;
     default: {
diff --git a/src/commands/doctor/shared/channel-legacy-config-migrate.test.ts b/src/commands/doctor/shared/channel-legacy-config-migrate.test.ts
index aba988ba9f9..b8a68c1d58b 100644
--- a/src/commands/doctor/shared/channel-legacy-config-migrate.test.ts
+++ b/src/commands/doctor/shared/channel-legacy-config-migrate.test.ts
@@ -1,5 +1,4 @@
-import { describe, expect, it, vi } from "vitest";
-import { applyChannelDoctorCompatibilityMigrations } from "./channel-legacy-config-migrate.js";
+import { beforeAll, describe, expect, it, vi } from "vitest";
 
 const applyPluginDoctorCompatibilityMigrations = vi.hoisted(() => vi.fn());
 
@@ -8,6 +7,16 @@ vi.mock("../../../plugins/doctor-contract-registry.js", () => ({
     applyPluginDoctorCompatibilityMigrations(...args),
 }));
 
+let applyChannelDoctorCompatibilityMigrations: typeof import("./channel-legacy-config-migrate.js").applyChannelDoctorCompatibilityMigrations;
+
+beforeAll(async () => {
+  // Commands runs on the shared non-isolated worker, so reload after installing
+  // this file's mock to avoid inheriting a cached real registry import.
+  vi.resetModules();
+  ({ applyChannelDoctorCompatibilityMigrations } =
+    await import("./channel-legacy-config-migrate.js"));
+});
+
 describe("bundled channel legacy config migrations", () => {
   it("normalizes legacy private-network aliases exposed through bundled contract surfaces", () => {
     applyPluginDoctorCompatibilityMigrations.mockReturnValueOnce({