From c848ebc8ce4b7451ce7105067cb3c4fc83645bc7 Mon Sep 17 00:00:00 2001 From: pashpashpash Date: Sun, 12 Apr 2026 18:52:22 -0700 Subject: [PATCH] agents: split GPT-5 prompt and retry behavior (#65597) * agents: split GPT-5 prompt and retry behavior * agents: fix GPT-5 review follow-ups * agents: address GPT-5 review follow-ups * agents: avoid replaying side-effectful GPT retries * agents: mark subagent control as mutating * agents: fail closed on single-action retries * commands: stabilize channel legacy doctor migration test * agents: narrow single-action retry promise trigger --- extensions/openai/index.test.ts | 28 ++- extensions/openai/prompt-overlay.ts | 23 ++- .../run.incomplete-turn.test.ts | 160 +++++++++++++++++- .../pi-embedded-runner/run/incomplete-turn.ts | 69 +++++++- ...-embedded-subscribe.handlers.tools.test.ts | 65 +++++++ src/agents/tool-mutation.test.ts | 8 + src/agents/tool-mutation.ts | 3 + .../channel-legacy-config-migrate.test.ts | 13 +- 8 files changed, 352 insertions(+), 17 deletions(-) diff --git a/extensions/openai/index.test.ts b/extensions/openai/index.test.ts index 320e83c1bbb..61be535cfb8 100644 --- a/extensions/openai/index.test.ts +++ b/extensions/openai/index.test.ts @@ -14,6 +14,7 @@ import { OPENAI_FRIENDLY_PROMPT_OVERLAY, OPENAI_GPT5_EXECUTION_BIAS, OPENAI_GPT5_OUTPUT_CONTRACT, + OPENAI_GPT5_TOOL_CALL_STYLE, } from "./prompt-overlay.js"; const runtimeMocks = vi.hoisted(() => ({ @@ -365,7 +366,7 @@ describe("openai plugin", () => { }; expect(openaiProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({ - stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT, + stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"), sectionOverrides: { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY, execution_bias: OPENAI_GPT5_EXECUTION_BIAS, @@ -382,7 +383,7 @@ describe("openai plugin", () => { "Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.", ); expect(codexProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({ - stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT, + stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"), sectionOverrides: { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY, execution_bias: OPENAI_GPT5_EXECUTION_BIAS, @@ -454,9 +455,22 @@ describe("openai plugin", () => { expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain( "Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.", ); + expect(OPENAI_GPT5_EXECUTION_BIAS).toContain( + "Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.", + ); + expect(OPENAI_GPT5_EXECUTION_BIAS).toContain( + "If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.", + ); expect(OPENAI_GPT5_EXECUTION_BIAS).toContain( "Do prerequisite lookup or discovery before dependent actions.", ); + expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain( + "Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.", + ); + expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain( + "When a first-class tool exists for an action, use the tool instead of asking the user to run a command.", + ); + expect(OPENAI_GPT5_TOOL_CALL_STYLE).not.toContain("/approve"); expect(OPENAI_GPT5_OUTPUT_CONTRACT).toContain( "Return the requested sections only, in the requested order.", ); @@ -486,7 +500,7 @@ describe("openai plugin", () => { agentId: undefined, }), ).toEqual({ - stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT, + stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"), sectionOverrides: { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY, execution_bias: OPENAI_GPT5_EXECUTION_BIAS, @@ -514,7 +528,7 @@ describe("openai plugin", () => { agentId: undefined, }), ).toEqual({ - stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT, + stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"), sectionOverrides: { execution_bias: OPENAI_GPT5_EXECUTION_BIAS, }, @@ -540,7 +554,7 @@ describe("openai plugin", () => { agentId: undefined, }), ).toEqual({ - stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT, + stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"), sectionOverrides: { execution_bias: OPENAI_GPT5_EXECUTION_BIAS, }, @@ -567,7 +581,7 @@ describe("openai plugin", () => { agentId: undefined, }), ).toEqual({ - stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT, + stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"), sectionOverrides: { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY, execution_bias: OPENAI_GPT5_EXECUTION_BIAS, @@ -594,7 +608,7 @@ describe("openai plugin", () => { agentId: undefined, }), ).toEqual({ - stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT, + stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"), sectionOverrides: { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY, execution_bias: OPENAI_GPT5_EXECUTION_BIAS, diff --git a/extensions/openai/prompt-overlay.ts b/extensions/openai/prompt-overlay.ts index c323f50916f..89ee56f6621 100644 --- a/extensions/openai/prompt-overlay.ts +++ b/extensions/openai/prompt-overlay.ts @@ -64,11 +64,20 @@ Do not use em dashes unless the user explicitly asks for them or they are requir export const OPENAI_GPT5_EXECUTION_BIAS = `## Execution Bias -Start the real work in the same turn when the next step is clear. +Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply. +Commentary-only turns are incomplete when tools are available and the next action is clear. +If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission. Do prerequisite lookup or discovery before dependent actions. -If another tool call would likely improve correctness or completeness, keep going instead of stopping at partial progress. Multi-part requests stay incomplete until every requested item is handled or clearly marked blocked. -Before the final answer, quickly verify correctness, coverage, formatting, and obvious side effects.`; +Act first, then verify if needed. Do not pause to summarize or verify before taking the next action.`; + +export const OPENAI_GPT5_TOOL_CALL_STYLE = `## Tool Call Style + +Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call. +When a first-class tool exists for an action, use the tool instead of asking the user to run a command. +If multiple tool calls are needed, call them in sequence without stopping to explain between calls. +Default: do not narrate routine, low-risk tool calls (just call the tool). +Narrate only when it genuinely helps: complex multi-step work, sensitive actions like deletions, or when the user explicitly asks for commentary.`; export type OpenAIPromptOverlayMode = "friendly" | "off"; @@ -103,8 +112,14 @@ export function resolveOpenAISystemPromptContribution(params: { ) { return undefined; } + // tool_call_style is NOT overridden via sectionOverrides because the + // default section includes dynamic channel-specific approval guidance + // from buildExecApprovalPromptGuidance() that varies per runtime + // channel. Overriding it with a static string would lose that dynamic + // content. Instead, the tool-first reinforcement lives in stablePrefix + // so it's always present alongside the default tool_call_style section. return { - stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT, + stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"), sectionOverrides: { execution_bias: OPENAI_GPT5_EXECUTION_BIAS, ...(params.mode === "friendly" ? { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY } : {}), diff --git a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts index 9093aee5a3e..b38283370f5 100644 --- a/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts +++ b/src/agents/pi-embedded-runner/run.incomplete-turn.test.ts @@ -10,8 +10,10 @@ import { resetRunOverflowCompactionHarnessMocks, } from "./run.overflow-compaction.harness.js"; import { + buildAttemptReplayMetadata, extractPlanningOnlyPlanDetails, isLikelyExecutionAckPrompt, + PLANNING_ONLY_RETRY_INSTRUCTION, resolveAckExecutionFastPathInstruction, resolvePlanningOnlyRetryLimit, resolvePlanningOnlyRetryInstruction, @@ -281,7 +283,10 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { timedOut: false, attempt: makeAttemptResult({ assistantTexts: ["I'll inspect the code, make the change, and run the checks."], - toolMetas: [{ toolName: "bash", meta: "ls" }], + toolMetas: [ + { toolName: "read", meta: "path=src/index.ts" }, + { toolName: "search", meta: "pattern=runEmbeddedPiAgent" }, + ], }), }); @@ -436,3 +441,156 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => { ).toBe("paused"); }); }); + +describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => { + const openaiParams = { provider: "openai", modelId: "gpt-5.4" } as const; + + function makeAttemptWithTools( + toolNames: string[], + assistantText: string, + ): Parameters[0]["attempt"] { + const toolMetas = toolNames.map((toolName) => ({ toolName })); + return { + toolMetas, + assistantTexts: [assistantText], + lastAssistant: { stopReason: "stop" }, + itemLifecycle: { startedCount: toolNames.length }, + replayMetadata: buildAttemptReplayMetadata({ + toolMetas, + didSendViaMessagingTool: false, + }), + clientToolCall: null, + yieldDetected: false, + didSendDeterministicApprovalPrompt: false, + didSendViaMessagingTool: false, + lastToolError: null, + } as unknown as Parameters[0]["attempt"]; + } + + it("retries when exactly 1 non-plan tool call plus 'i can do that' prose is detected", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["read"], "I can do that next."), + }); + + expect(result).toBe(PLANNING_ONLY_RETRY_INSTRUCTION); + }); + + it("retries when exactly 1 non-plan tool call plus planning prose is detected", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["read"], "I'll analyze the structure next."), + }); + + expect(result).toBe(PLANNING_ONLY_RETRY_INSTRUCTION); + }); + + it("does not retry when 2+ non-plan tool calls are present", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["read", "search"], "I'll verify the output."), + }); + + expect(result).toBeNull(); + }); + + it("does not retry when 1 tool call plus completion language is present", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["read"], "Done. The file looks correct."), + }); + + expect(result).toBeNull(); + }); + + it("does not retry when 1 tool call plus 'let me know' handoff is present", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["read"], "Let me know if you need anything else."), + }); + + expect(result).toBeNull(); + }); + + it("does not retry when 1 tool call plus an answer-style summary is present", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools( + ["read"], + "I'll summarize the root cause: the provider auth scope is missing.", + ), + }); + + expect(result).toBeNull(); + }); + + it("does not retry when 1 tool call plus a future-tense description is present", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools( + ["read"], + "I'll describe the issue: the provider auth scope is missing.", + ), + }); + + expect(result).toBeNull(); + }); + + it("does not retry when 1 safe tool call is followed by answer prose joined with 'and'", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["read"], "I'll explain and recommend a fix."), + }); + + expect(result).toBeNull(); + }); + + it("does not retry when 1 tool call plus a bare 'i can do that' reply is present", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["read"], "I can do that."), + }); + + expect(result).toBeNull(); + }); + + it("does not retry when the lone tool call already had side effects", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["sessions_spawn"], "I'll continue from there next."), + }); + + expect(result).toBeNull(); + }); + + it("does not retry when the lone tool call is unclassified", () => { + const result = resolvePlanningOnlyRetryInstruction({ + ...openaiParams, + aborted: false, + timedOut: false, + attempt: makeAttemptWithTools(["vendor_widget"], "I'll continue from there next."), + }); + + expect(result).toBeNull(); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/incomplete-turn.ts b/src/agents/pi-embedded-runner/run/incomplete-turn.ts index 06962c7e66d..d6266cf4d7a 100644 --- a/src/agents/pi-embedded-runner/run/incomplete-turn.ts +++ b/src/agents/pi-embedded-runner/run/incomplete-turn.ts @@ -54,6 +54,21 @@ const PLANNING_ONLY_COMPLETION_RE = /\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i; const PLANNING_ONLY_HEADING_RE = /^(?:plan|steps?|next steps?)\s*:/i; const PLANNING_ONLY_BULLET_RE = /^(?:[-*•]\s+|\d+[.)]\s+)/u; +const PLANNING_ONLY_MAX_VISIBLE_TEXT = 700; +const SINGLE_ACTION_EXPLICIT_CONTINUATION_RE = + /\b(?:going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|then[, ]+i(?:'ll| will)|i can do that next|let me (?!know\b)\w+(?:\s+\w+){0,3}\s+(?:next|then|first)\b)/i; +const SINGLE_ACTION_MULTI_STEP_PROMISE_RE = + /\bi(?:'ll| will)\b(?=[^.!?]{0,160}\b(?:next|then|after(?:wards)?|once)\b)/i; +const SINGLE_ACTION_RESULT_STYLE_RE = + /\b(?:i(?:'ll| will)\s+(?:summarize|explain|share|show|report|describe|clarify|answer|recap)(?:\s+\w+){0,4}\s*:|(?:here(?:'s| is)|summary|result|answer|findings?|root cause)\s*:)/i; +const SINGLE_ACTION_RETRY_SAFE_TOOL_NAMES = new Set([ + "read", + "search", + "find", + "grep", + "glob", + "ls", +]); const DEFAULT_PLANNING_ONLY_RETRY_LIMIT = 1; const STRICT_AGENTIC_PLANNING_ONLY_RETRY_LIMIT = 2; const ACK_EXECUTION_NORMALIZED_SET = new Set([ @@ -284,10 +299,51 @@ function countPlanOnlyToolMetas(toolMetas: PlanningOnlyAttempt["toolMetas"]): nu return toolMetas.filter((entry) => entry.toolName === "update_plan").length; } +function countNonPlanToolCalls(toolMetas: PlanningOnlyAttempt["toolMetas"]): number { + return toolMetas.filter((entry) => entry.toolName !== "update_plan").length; +} + function hasNonPlanToolActivity(toolMetas: PlanningOnlyAttempt["toolMetas"]): boolean { return toolMetas.some((entry) => entry.toolName !== "update_plan"); } +function hasSingleRetrySafeNonPlanTool(toolMetas: PlanningOnlyAttempt["toolMetas"]): boolean { + const nonPlanToolNames = toolMetas + .map((entry) => normalizeLowercaseStringOrEmpty(entry.toolName)) + .filter((toolName) => toolName && toolName !== "update_plan"); + return ( + nonPlanToolNames.length === 1 && + SINGLE_ACTION_RETRY_SAFE_TOOL_NAMES.has(nonPlanToolNames[0] ?? "") + ); +} + +/** + * Treat a turn with exactly one non-plan tool call plus visible "I'll do X + * next" prose as effectively planning-only from the user's perspective. This + * closes the one-action-then-narrative loophole without changing the 2+ tool + * call path, which still counts as real multi-step progress. + */ +function isSingleActionThenNarrativePattern(params: { + toolMetas: PlanningOnlyAttempt["toolMetas"]; + assistantTexts: readonly string[]; +}): boolean { + const nonPlanCount = countNonPlanToolCalls(params.toolMetas); + if (nonPlanCount !== 1) { + return false; + } + const text = params.assistantTexts.join("\n\n").trim(); + if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT) { + return false; + } + if (SINGLE_ACTION_RESULT_STYLE_RE.test(text)) { + return false; + } + return ( + SINGLE_ACTION_EXPLICIT_CONTINUATION_RE.test(text) || + SINGLE_ACTION_MULTI_STEP_PROMISE_RE.test(text) + ); +} + export function resolvePlanningOnlyRetryLimit( executionContract?: EmbeddedPiExecutionContract, ): number { @@ -304,6 +360,12 @@ export function resolvePlanningOnlyRetryInstruction(params: { attempt: PlanningOnlyAttempt; }): string | null { const planOnlyToolMetaCount = countPlanOnlyToolMetas(params.attempt.toolMetas); + const singleActionNarrative = isSingleActionThenNarrativePattern({ + toolMetas: params.attempt.toolMetas, + assistantTexts: params.attempt.assistantTexts, + }); + const allowSingleActionRetryBypass = + singleActionNarrative && hasSingleRetrySafeNonPlanTool(params.attempt.toolMetas); if ( !shouldApplyPlanningOnlyRetryGuard({ provider: params.provider, @@ -316,8 +378,9 @@ export function resolvePlanningOnlyRetryInstruction(params: { params.attempt.didSendDeterministicApprovalPrompt || params.attempt.didSendViaMessagingTool || params.attempt.lastToolError || - hasNonPlanToolActivity(params.attempt.toolMetas) || - params.attempt.itemLifecycle.startedCount > planOnlyToolMetaCount || + (hasNonPlanToolActivity(params.attempt.toolMetas) && !allowSingleActionRetryBypass) || + (params.attempt.itemLifecycle.startedCount > planOnlyToolMetaCount && + !allowSingleActionRetryBypass) || params.attempt.replayMetadata.hadPotentialSideEffects ) { return null; @@ -329,7 +392,7 @@ export function resolvePlanningOnlyRetryInstruction(params: { } const text = params.attempt.assistantTexts.join("\n\n").trim(); - if (!text || text.length > 700 || text.includes("```")) { + if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT || text.includes("```")) { return null; } if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningOnlyFormat(text)) { diff --git a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts index d866ff4a146..661f0d4ebe1 100644 --- a/src/agents/pi-embedded-subscribe.handlers.tools.test.ts +++ b/src/agents/pi-embedded-subscribe.handlers.tools.test.ts @@ -283,6 +283,71 @@ describe("handleToolExecutionEnd mutating failure recovery", () => { }); }); + it("marks successful subagents control actions as replay-invalid", async () => { + const { ctx } = createTestContext(); + + await handleToolExecutionStart( + ctx as never, + { + type: "tool_execution_start", + toolName: "subagents", + toolCallId: "tool-subagents-kill", + args: { + action: "kill", + target: "worker-1", + }, + } as never, + ); + + await handleToolExecutionEnd( + ctx as never, + { + type: "tool_execution_end", + toolName: "subagents", + toolCallId: "tool-subagents-kill", + isError: false, + result: { status: "ok", action: "kill", target: "worker-1" }, + } as never, + ); + + expect(ctx.state.replayState).toEqual({ + replayInvalid: true, + hadPotentialSideEffects: true, + }); + }); + + it("keeps read-only subagents list actions replay-safe", async () => { + const { ctx } = createTestContext(); + + await handleToolExecutionStart( + ctx as never, + { + type: "tool_execution_start", + toolName: "subagents", + toolCallId: "tool-subagents-list", + args: { + action: "list", + }, + } as never, + ); + + await handleToolExecutionEnd( + ctx as never, + { + type: "tool_execution_end", + toolName: "subagents", + toolCallId: "tool-subagents-list", + isError: false, + result: { status: "ok", action: "list", total: 0, text: "no active subagents." }, + } as never, + ); + + expect(ctx.state.replayState).toEqual({ + replayInvalid: false, + hadPotentialSideEffects: false, + }); + }); + it("keeps successful mutating retries replay-invalid after an earlier tool failure", async () => { const { ctx } = createTestContext(); diff --git a/src/agents/tool-mutation.test.ts b/src/agents/tool-mutation.test.ts index 9ae7f7148e2..780b4dbd749 100644 --- a/src/agents/tool-mutation.test.ts +++ b/src/agents/tool-mutation.test.ts @@ -58,6 +58,13 @@ describe("tool mutation helpers", () => { buildToolMutationState("message", { action: "send", to: "telegram:1" }).mutatingAction, ).toBe(true); expect(buildToolMutationState("browser", { action: "list" }).mutatingAction).toBe(false); + expect( + buildToolMutationState("subagents", { action: "kill", target: "worker-1" }).mutatingAction, + ).toBe(true); + expect( + buildToolMutationState("subagents", { action: "steer", target: "worker-1" }).mutatingAction, + ).toBe(true); + expect(buildToolMutationState("subagents", { action: "list" }).mutatingAction).toBe(false); }); it("matches tool actions by fingerprint and fails closed on asymmetric data", () => { @@ -82,6 +89,7 @@ describe("tool mutation helpers", () => { }); it("keeps legacy name-only mutating heuristics for payload fallback", () => { + expect(isLikelyMutatingToolName("sessions_spawn")).toBe(true); expect(isLikelyMutatingToolName("sessions_send")).toBe(true); expect(isLikelyMutatingToolName("browser_actions")).toBe(true); expect(isLikelyMutatingToolName("message_slack")).toBe(true); diff --git a/src/agents/tool-mutation.ts b/src/agents/tool-mutation.ts index d9349ba4654..65ee228856e 100644 --- a/src/agents/tool-mutation.ts +++ b/src/agents/tool-mutation.ts @@ -12,6 +12,7 @@ const MUTATING_TOOL_NAMES = new Set([ "bash", "process", "message", + "sessions_spawn", "sessions_send", "cron", "gateway", @@ -129,6 +130,8 @@ export function isMutatingToolCall(toolName: string, args: unknown): boolean { typeof record?.content === "string" || typeof record?.message === "string" ); + case "subagents": + return action === "kill" || action === "steer"; case "session_status": return typeof record?.model === "string" && record.model.trim().length > 0; default: { diff --git a/src/commands/doctor/shared/channel-legacy-config-migrate.test.ts b/src/commands/doctor/shared/channel-legacy-config-migrate.test.ts index aba988ba9f9..b8a68c1d58b 100644 --- a/src/commands/doctor/shared/channel-legacy-config-migrate.test.ts +++ b/src/commands/doctor/shared/channel-legacy-config-migrate.test.ts @@ -1,5 +1,4 @@ -import { describe, expect, it, vi } from "vitest"; -import { applyChannelDoctorCompatibilityMigrations } from "./channel-legacy-config-migrate.js"; +import { beforeAll, describe, expect, it, vi } from "vitest"; const applyPluginDoctorCompatibilityMigrations = vi.hoisted(() => vi.fn()); @@ -8,6 +7,16 @@ vi.mock("../../../plugins/doctor-contract-registry.js", () => ({ applyPluginDoctorCompatibilityMigrations(...args), })); +let applyChannelDoctorCompatibilityMigrations: typeof import("./channel-legacy-config-migrate.js").applyChannelDoctorCompatibilityMigrations; + +beforeAll(async () => { + // Commands runs on the shared non-isolated worker, so reload after installing + // this file's mock to avoid inheriting a cached real registry import. + vi.resetModules(); + ({ applyChannelDoctorCompatibilityMigrations } = + await import("./channel-legacy-config-migrate.js")); +}); + describe("bundled channel legacy config migrations", () => { it("normalizes legacy private-network aliases exposed through bundled contract surfaces", () => { applyPluginDoctorCompatibilityMigrations.mockReturnValueOnce({