mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-18 15:23:23 +02:00
agents: split GPT-5 prompt and retry behavior (#65597)
* agents: split GPT-5 prompt and retry behavior * agents: fix GPT-5 review follow-ups * agents: address GPT-5 review follow-ups * agents: avoid replaying side-effectful GPT retries * agents: mark subagent control as mutating * agents: fail closed on single-action retries * commands: stabilize channel legacy doctor migration test * agents: narrow single-action retry promise trigger
This commit is contained in:
@@ -14,6 +14,7 @@ import {
|
||||
OPENAI_FRIENDLY_PROMPT_OVERLAY,
|
||||
OPENAI_GPT5_EXECUTION_BIAS,
|
||||
OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
OPENAI_GPT5_TOOL_CALL_STYLE,
|
||||
} from "./prompt-overlay.js";
|
||||
|
||||
const runtimeMocks = vi.hoisted(() => ({
|
||||
@@ -365,7 +366,7 @@ describe("openai plugin", () => {
|
||||
};
|
||||
|
||||
expect(openaiProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
|
||||
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
|
||||
sectionOverrides: {
|
||||
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
|
||||
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
|
||||
@@ -382,7 +383,7 @@ describe("openai plugin", () => {
|
||||
"Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
|
||||
);
|
||||
expect(codexProvider.resolveSystemPromptContribution?.(contributionContext)).toEqual({
|
||||
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
|
||||
sectionOverrides: {
|
||||
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
|
||||
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
|
||||
@@ -454,9 +455,22 @@ describe("openai plugin", () => {
|
||||
expect(OPENAI_FRIENDLY_PROMPT_OVERLAY).toContain(
|
||||
"Occasional emoji are welcome when they fit naturally, especially for warmth or brief celebration; keep them sparse.",
|
||||
);
|
||||
expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
|
||||
"Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.",
|
||||
);
|
||||
expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
|
||||
"If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.",
|
||||
);
|
||||
expect(OPENAI_GPT5_EXECUTION_BIAS).toContain(
|
||||
"Do prerequisite lookup or discovery before dependent actions.",
|
||||
);
|
||||
expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
|
||||
"Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.",
|
||||
);
|
||||
expect(OPENAI_GPT5_TOOL_CALL_STYLE).toContain(
|
||||
"When a first-class tool exists for an action, use the tool instead of asking the user to run a command.",
|
||||
);
|
||||
expect(OPENAI_GPT5_TOOL_CALL_STYLE).not.toContain("/approve");
|
||||
expect(OPENAI_GPT5_OUTPUT_CONTRACT).toContain(
|
||||
"Return the requested sections only, in the requested order.",
|
||||
);
|
||||
@@ -486,7 +500,7 @@ describe("openai plugin", () => {
|
||||
agentId: undefined,
|
||||
}),
|
||||
).toEqual({
|
||||
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
|
||||
sectionOverrides: {
|
||||
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
|
||||
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
|
||||
@@ -514,7 +528,7 @@ describe("openai plugin", () => {
|
||||
agentId: undefined,
|
||||
}),
|
||||
).toEqual({
|
||||
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
|
||||
sectionOverrides: {
|
||||
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
|
||||
},
|
||||
@@ -540,7 +554,7 @@ describe("openai plugin", () => {
|
||||
agentId: undefined,
|
||||
}),
|
||||
).toEqual({
|
||||
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
|
||||
sectionOverrides: {
|
||||
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
|
||||
},
|
||||
@@ -567,7 +581,7 @@ describe("openai plugin", () => {
|
||||
agentId: undefined,
|
||||
}),
|
||||
).toEqual({
|
||||
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
|
||||
sectionOverrides: {
|
||||
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
|
||||
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
|
||||
@@ -594,7 +608,7 @@ describe("openai plugin", () => {
|
||||
agentId: undefined,
|
||||
}),
|
||||
).toEqual({
|
||||
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
|
||||
sectionOverrides: {
|
||||
interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY,
|
||||
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
|
||||
|
||||
@@ -64,11 +64,20 @@ Do not use em dashes unless the user explicitly asks for them or they are requir
|
||||
|
||||
export const OPENAI_GPT5_EXECUTION_BIAS = `## Execution Bias
|
||||
|
||||
Start the real work in the same turn when the next step is clear.
|
||||
Use a real tool call or concrete action FIRST when the task is actionable. Do not stop at a plan or promise-to-act reply.
|
||||
Commentary-only turns are incomplete when tools are available and the next action is clear.
|
||||
If the work will take multiple steps, keep calling tools until the task is done or you hit a real blocker. Do not stop after one step to ask permission.
|
||||
Do prerequisite lookup or discovery before dependent actions.
|
||||
If another tool call would likely improve correctness or completeness, keep going instead of stopping at partial progress.
|
||||
Multi-part requests stay incomplete until every requested item is handled or clearly marked blocked.
|
||||
Before the final answer, quickly verify correctness, coverage, formatting, and obvious side effects.`;
|
||||
Act first, then verify if needed. Do not pause to summarize or verify before taking the next action.`;
|
||||
|
||||
export const OPENAI_GPT5_TOOL_CALL_STYLE = `## Tool Call Style
|
||||
|
||||
Call tools directly without narrating what you are about to do. Do not describe a plan before each tool call.
|
||||
When a first-class tool exists for an action, use the tool instead of asking the user to run a command.
|
||||
If multiple tool calls are needed, call them in sequence without stopping to explain between calls.
|
||||
Default: do not narrate routine, low-risk tool calls (just call the tool).
|
||||
Narrate only when it genuinely helps: complex multi-step work, sensitive actions like deletions, or when the user explicitly asks for commentary.`;
|
||||
|
||||
export type OpenAIPromptOverlayMode = "friendly" | "off";
|
||||
|
||||
@@ -103,8 +112,14 @@ export function resolveOpenAISystemPromptContribution(params: {
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
// tool_call_style is NOT overridden via sectionOverrides because the
|
||||
// default section includes dynamic channel-specific approval guidance
|
||||
// from buildExecApprovalPromptGuidance() that varies per runtime
|
||||
// channel. Overriding it with a static string would lose that dynamic
|
||||
// content. Instead, the tool-first reinforcement lives in stablePrefix
|
||||
// so it's always present alongside the default tool_call_style section.
|
||||
return {
|
||||
stablePrefix: OPENAI_GPT5_OUTPUT_CONTRACT,
|
||||
stablePrefix: [OPENAI_GPT5_OUTPUT_CONTRACT, OPENAI_GPT5_TOOL_CALL_STYLE].join("\n\n"),
|
||||
sectionOverrides: {
|
||||
execution_bias: OPENAI_GPT5_EXECUTION_BIAS,
|
||||
...(params.mode === "friendly" ? { interaction_style: OPENAI_FRIENDLY_PROMPT_OVERLAY } : {}),
|
||||
|
||||
@@ -10,8 +10,10 @@ import {
|
||||
resetRunOverflowCompactionHarnessMocks,
|
||||
} from "./run.overflow-compaction.harness.js";
|
||||
import {
|
||||
buildAttemptReplayMetadata,
|
||||
extractPlanningOnlyPlanDetails,
|
||||
isLikelyExecutionAckPrompt,
|
||||
PLANNING_ONLY_RETRY_INSTRUCTION,
|
||||
resolveAckExecutionFastPathInstruction,
|
||||
resolvePlanningOnlyRetryLimit,
|
||||
resolvePlanningOnlyRetryInstruction,
|
||||
@@ -281,7 +283,10 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
|
||||
timedOut: false,
|
||||
attempt: makeAttemptResult({
|
||||
assistantTexts: ["I'll inspect the code, make the change, and run the checks."],
|
||||
toolMetas: [{ toolName: "bash", meta: "ls" }],
|
||||
toolMetas: [
|
||||
{ toolName: "read", meta: "path=src/index.ts" },
|
||||
{ toolName: "search", meta: "pattern=runEmbeddedPiAgent" },
|
||||
],
|
||||
}),
|
||||
});
|
||||
|
||||
@@ -436,3 +441,156 @@ describe("runEmbeddedPiAgent incomplete-turn safety", () => {
|
||||
).toBe("paused");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolvePlanningOnlyRetryInstruction single-action loophole", () => {
|
||||
const openaiParams = { provider: "openai", modelId: "gpt-5.4" } as const;
|
||||
|
||||
function makeAttemptWithTools(
|
||||
toolNames: string[],
|
||||
assistantText: string,
|
||||
): Parameters<typeof resolvePlanningOnlyRetryInstruction>[0]["attempt"] {
|
||||
const toolMetas = toolNames.map((toolName) => ({ toolName }));
|
||||
return {
|
||||
toolMetas,
|
||||
assistantTexts: [assistantText],
|
||||
lastAssistant: { stopReason: "stop" },
|
||||
itemLifecycle: { startedCount: toolNames.length },
|
||||
replayMetadata: buildAttemptReplayMetadata({
|
||||
toolMetas,
|
||||
didSendViaMessagingTool: false,
|
||||
}),
|
||||
clientToolCall: null,
|
||||
yieldDetected: false,
|
||||
didSendDeterministicApprovalPrompt: false,
|
||||
didSendViaMessagingTool: false,
|
||||
lastToolError: null,
|
||||
} as unknown as Parameters<typeof resolvePlanningOnlyRetryInstruction>[0]["attempt"];
|
||||
}
|
||||
|
||||
it("retries when exactly 1 non-plan tool call plus 'i can do that' prose is detected", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["read"], "I can do that next."),
|
||||
});
|
||||
|
||||
expect(result).toBe(PLANNING_ONLY_RETRY_INSTRUCTION);
|
||||
});
|
||||
|
||||
it("retries when exactly 1 non-plan tool call plus planning prose is detected", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["read"], "I'll analyze the structure next."),
|
||||
});
|
||||
|
||||
expect(result).toBe(PLANNING_ONLY_RETRY_INSTRUCTION);
|
||||
});
|
||||
|
||||
it("does not retry when 2+ non-plan tool calls are present", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["read", "search"], "I'll verify the output."),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not retry when 1 tool call plus completion language is present", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["read"], "Done. The file looks correct."),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not retry when 1 tool call plus 'let me know' handoff is present", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["read"], "Let me know if you need anything else."),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not retry when 1 tool call plus an answer-style summary is present", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(
|
||||
["read"],
|
||||
"I'll summarize the root cause: the provider auth scope is missing.",
|
||||
),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not retry when 1 tool call plus a future-tense description is present", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(
|
||||
["read"],
|
||||
"I'll describe the issue: the provider auth scope is missing.",
|
||||
),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not retry when 1 safe tool call is followed by answer prose joined with 'and'", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["read"], "I'll explain and recommend a fix."),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not retry when 1 tool call plus a bare 'i can do that' reply is present", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["read"], "I can do that."),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not retry when the lone tool call already had side effects", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["sessions_spawn"], "I'll continue from there next."),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("does not retry when the lone tool call is unclassified", () => {
|
||||
const result = resolvePlanningOnlyRetryInstruction({
|
||||
...openaiParams,
|
||||
aborted: false,
|
||||
timedOut: false,
|
||||
attempt: makeAttemptWithTools(["vendor_widget"], "I'll continue from there next."),
|
||||
});
|
||||
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -54,6 +54,21 @@ const PLANNING_ONLY_COMPLETION_RE =
|
||||
/\b(?:done|finished|implemented|updated|fixed|changed|ran|verified|found|here(?:'s| is) what|blocked by|the blocker is)\b/i;
|
||||
const PLANNING_ONLY_HEADING_RE = /^(?:plan|steps?|next steps?)\s*:/i;
|
||||
const PLANNING_ONLY_BULLET_RE = /^(?:[-*•]\s+|\d+[.)]\s+)/u;
|
||||
const PLANNING_ONLY_MAX_VISIBLE_TEXT = 700;
|
||||
const SINGLE_ACTION_EXPLICIT_CONTINUATION_RE =
|
||||
/\b(?:going to|first[, ]+i(?:'ll| will)|next[, ]+i(?:'ll| will)|then[, ]+i(?:'ll| will)|i can do that next|let me (?!know\b)\w+(?:\s+\w+){0,3}\s+(?:next|then|first)\b)/i;
|
||||
const SINGLE_ACTION_MULTI_STEP_PROMISE_RE =
|
||||
/\bi(?:'ll| will)\b(?=[^.!?]{0,160}\b(?:next|then|after(?:wards)?|once)\b)/i;
|
||||
const SINGLE_ACTION_RESULT_STYLE_RE =
|
||||
/\b(?:i(?:'ll| will)\s+(?:summarize|explain|share|show|report|describe|clarify|answer|recap)(?:\s+\w+){0,4}\s*:|(?:here(?:'s| is)|summary|result|answer|findings?|root cause)\s*:)/i;
|
||||
const SINGLE_ACTION_RETRY_SAFE_TOOL_NAMES = new Set([
|
||||
"read",
|
||||
"search",
|
||||
"find",
|
||||
"grep",
|
||||
"glob",
|
||||
"ls",
|
||||
]);
|
||||
const DEFAULT_PLANNING_ONLY_RETRY_LIMIT = 1;
|
||||
const STRICT_AGENTIC_PLANNING_ONLY_RETRY_LIMIT = 2;
|
||||
const ACK_EXECUTION_NORMALIZED_SET = new Set([
|
||||
@@ -284,10 +299,51 @@ function countPlanOnlyToolMetas(toolMetas: PlanningOnlyAttempt["toolMetas"]): nu
|
||||
return toolMetas.filter((entry) => entry.toolName === "update_plan").length;
|
||||
}
|
||||
|
||||
function countNonPlanToolCalls(toolMetas: PlanningOnlyAttempt["toolMetas"]): number {
|
||||
return toolMetas.filter((entry) => entry.toolName !== "update_plan").length;
|
||||
}
|
||||
|
||||
function hasNonPlanToolActivity(toolMetas: PlanningOnlyAttempt["toolMetas"]): boolean {
|
||||
return toolMetas.some((entry) => entry.toolName !== "update_plan");
|
||||
}
|
||||
|
||||
function hasSingleRetrySafeNonPlanTool(toolMetas: PlanningOnlyAttempt["toolMetas"]): boolean {
|
||||
const nonPlanToolNames = toolMetas
|
||||
.map((entry) => normalizeLowercaseStringOrEmpty(entry.toolName))
|
||||
.filter((toolName) => toolName && toolName !== "update_plan");
|
||||
return (
|
||||
nonPlanToolNames.length === 1 &&
|
||||
SINGLE_ACTION_RETRY_SAFE_TOOL_NAMES.has(nonPlanToolNames[0] ?? "")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Treat a turn with exactly one non-plan tool call plus visible "I'll do X
|
||||
* next" prose as effectively planning-only from the user's perspective. This
|
||||
* closes the one-action-then-narrative loophole without changing the 2+ tool
|
||||
* call path, which still counts as real multi-step progress.
|
||||
*/
|
||||
function isSingleActionThenNarrativePattern(params: {
|
||||
toolMetas: PlanningOnlyAttempt["toolMetas"];
|
||||
assistantTexts: readonly string[];
|
||||
}): boolean {
|
||||
const nonPlanCount = countNonPlanToolCalls(params.toolMetas);
|
||||
if (nonPlanCount !== 1) {
|
||||
return false;
|
||||
}
|
||||
const text = params.assistantTexts.join("\n\n").trim();
|
||||
if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT) {
|
||||
return false;
|
||||
}
|
||||
if (SINGLE_ACTION_RESULT_STYLE_RE.test(text)) {
|
||||
return false;
|
||||
}
|
||||
return (
|
||||
SINGLE_ACTION_EXPLICIT_CONTINUATION_RE.test(text) ||
|
||||
SINGLE_ACTION_MULTI_STEP_PROMISE_RE.test(text)
|
||||
);
|
||||
}
|
||||
|
||||
export function resolvePlanningOnlyRetryLimit(
|
||||
executionContract?: EmbeddedPiExecutionContract,
|
||||
): number {
|
||||
@@ -304,6 +360,12 @@ export function resolvePlanningOnlyRetryInstruction(params: {
|
||||
attempt: PlanningOnlyAttempt;
|
||||
}): string | null {
|
||||
const planOnlyToolMetaCount = countPlanOnlyToolMetas(params.attempt.toolMetas);
|
||||
const singleActionNarrative = isSingleActionThenNarrativePattern({
|
||||
toolMetas: params.attempt.toolMetas,
|
||||
assistantTexts: params.attempt.assistantTexts,
|
||||
});
|
||||
const allowSingleActionRetryBypass =
|
||||
singleActionNarrative && hasSingleRetrySafeNonPlanTool(params.attempt.toolMetas);
|
||||
if (
|
||||
!shouldApplyPlanningOnlyRetryGuard({
|
||||
provider: params.provider,
|
||||
@@ -316,8 +378,9 @@ export function resolvePlanningOnlyRetryInstruction(params: {
|
||||
params.attempt.didSendDeterministicApprovalPrompt ||
|
||||
params.attempt.didSendViaMessagingTool ||
|
||||
params.attempt.lastToolError ||
|
||||
hasNonPlanToolActivity(params.attempt.toolMetas) ||
|
||||
params.attempt.itemLifecycle.startedCount > planOnlyToolMetaCount ||
|
||||
(hasNonPlanToolActivity(params.attempt.toolMetas) && !allowSingleActionRetryBypass) ||
|
||||
(params.attempt.itemLifecycle.startedCount > planOnlyToolMetaCount &&
|
||||
!allowSingleActionRetryBypass) ||
|
||||
params.attempt.replayMetadata.hadPotentialSideEffects
|
||||
) {
|
||||
return null;
|
||||
@@ -329,7 +392,7 @@ export function resolvePlanningOnlyRetryInstruction(params: {
|
||||
}
|
||||
|
||||
const text = params.attempt.assistantTexts.join("\n\n").trim();
|
||||
if (!text || text.length > 700 || text.includes("```")) {
|
||||
if (!text || text.length > PLANNING_ONLY_MAX_VISIBLE_TEXT || text.includes("```")) {
|
||||
return null;
|
||||
}
|
||||
if (!PLANNING_ONLY_PROMISE_RE.test(text) && !hasStructuredPlanningOnlyFormat(text)) {
|
||||
|
||||
@@ -283,6 +283,71 @@ describe("handleToolExecutionEnd mutating failure recovery", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("marks successful subagents control actions as replay-invalid", async () => {
|
||||
const { ctx } = createTestContext();
|
||||
|
||||
await handleToolExecutionStart(
|
||||
ctx as never,
|
||||
{
|
||||
type: "tool_execution_start",
|
||||
toolName: "subagents",
|
||||
toolCallId: "tool-subagents-kill",
|
||||
args: {
|
||||
action: "kill",
|
||||
target: "worker-1",
|
||||
},
|
||||
} as never,
|
||||
);
|
||||
|
||||
await handleToolExecutionEnd(
|
||||
ctx as never,
|
||||
{
|
||||
type: "tool_execution_end",
|
||||
toolName: "subagents",
|
||||
toolCallId: "tool-subagents-kill",
|
||||
isError: false,
|
||||
result: { status: "ok", action: "kill", target: "worker-1" },
|
||||
} as never,
|
||||
);
|
||||
|
||||
expect(ctx.state.replayState).toEqual({
|
||||
replayInvalid: true,
|
||||
hadPotentialSideEffects: true,
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps read-only subagents list actions replay-safe", async () => {
|
||||
const { ctx } = createTestContext();
|
||||
|
||||
await handleToolExecutionStart(
|
||||
ctx as never,
|
||||
{
|
||||
type: "tool_execution_start",
|
||||
toolName: "subagents",
|
||||
toolCallId: "tool-subagents-list",
|
||||
args: {
|
||||
action: "list",
|
||||
},
|
||||
} as never,
|
||||
);
|
||||
|
||||
await handleToolExecutionEnd(
|
||||
ctx as never,
|
||||
{
|
||||
type: "tool_execution_end",
|
||||
toolName: "subagents",
|
||||
toolCallId: "tool-subagents-list",
|
||||
isError: false,
|
||||
result: { status: "ok", action: "list", total: 0, text: "no active subagents." },
|
||||
} as never,
|
||||
);
|
||||
|
||||
expect(ctx.state.replayState).toEqual({
|
||||
replayInvalid: false,
|
||||
hadPotentialSideEffects: false,
|
||||
});
|
||||
});
|
||||
|
||||
it("keeps successful mutating retries replay-invalid after an earlier tool failure", async () => {
|
||||
const { ctx } = createTestContext();
|
||||
|
||||
|
||||
@@ -58,6 +58,13 @@ describe("tool mutation helpers", () => {
|
||||
buildToolMutationState("message", { action: "send", to: "telegram:1" }).mutatingAction,
|
||||
).toBe(true);
|
||||
expect(buildToolMutationState("browser", { action: "list" }).mutatingAction).toBe(false);
|
||||
expect(
|
||||
buildToolMutationState("subagents", { action: "kill", target: "worker-1" }).mutatingAction,
|
||||
).toBe(true);
|
||||
expect(
|
||||
buildToolMutationState("subagents", { action: "steer", target: "worker-1" }).mutatingAction,
|
||||
).toBe(true);
|
||||
expect(buildToolMutationState("subagents", { action: "list" }).mutatingAction).toBe(false);
|
||||
});
|
||||
|
||||
it("matches tool actions by fingerprint and fails closed on asymmetric data", () => {
|
||||
@@ -82,6 +89,7 @@ describe("tool mutation helpers", () => {
|
||||
});
|
||||
|
||||
it("keeps legacy name-only mutating heuristics for payload fallback", () => {
|
||||
expect(isLikelyMutatingToolName("sessions_spawn")).toBe(true);
|
||||
expect(isLikelyMutatingToolName("sessions_send")).toBe(true);
|
||||
expect(isLikelyMutatingToolName("browser_actions")).toBe(true);
|
||||
expect(isLikelyMutatingToolName("message_slack")).toBe(true);
|
||||
|
||||
@@ -12,6 +12,7 @@ const MUTATING_TOOL_NAMES = new Set([
|
||||
"bash",
|
||||
"process",
|
||||
"message",
|
||||
"sessions_spawn",
|
||||
"sessions_send",
|
||||
"cron",
|
||||
"gateway",
|
||||
@@ -129,6 +130,8 @@ export function isMutatingToolCall(toolName: string, args: unknown): boolean {
|
||||
typeof record?.content === "string" ||
|
||||
typeof record?.message === "string"
|
||||
);
|
||||
case "subagents":
|
||||
return action === "kill" || action === "steer";
|
||||
case "session_status":
|
||||
return typeof record?.model === "string" && record.model.trim().length > 0;
|
||||
default: {
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import { applyChannelDoctorCompatibilityMigrations } from "./channel-legacy-config-migrate.js";
|
||||
import { beforeAll, describe, expect, it, vi } from "vitest";
|
||||
|
||||
const applyPluginDoctorCompatibilityMigrations = vi.hoisted(() => vi.fn());
|
||||
|
||||
@@ -8,6 +7,16 @@ vi.mock("../../../plugins/doctor-contract-registry.js", () => ({
|
||||
applyPluginDoctorCompatibilityMigrations(...args),
|
||||
}));
|
||||
|
||||
let applyChannelDoctorCompatibilityMigrations: typeof import("./channel-legacy-config-migrate.js").applyChannelDoctorCompatibilityMigrations;
|
||||
|
||||
beforeAll(async () => {
|
||||
// Commands runs on the shared non-isolated worker, so reload after installing
|
||||
// this file's mock to avoid inheriting a cached real registry import.
|
||||
vi.resetModules();
|
||||
({ applyChannelDoctorCompatibilityMigrations } =
|
||||
await import("./channel-legacy-config-migrate.js"));
|
||||
});
|
||||
|
||||
describe("bundled channel legacy config migrations", () => {
|
||||
it("normalizes legacy private-network aliases exposed through bundled contract surfaces", () => {
|
||||
applyPluginDoctorCompatibilityMigrations.mockReturnValueOnce({
|
||||
|
||||
Reference in New Issue
Block a user