diff --git a/src/agents/cli-runner.reliability.test.ts b/src/agents/cli-runner.reliability.test.ts index accd58d04f6..4c9b0ea3e3d 100644 --- a/src/agents/cli-runner.reliability.test.ts +++ b/src/agents/cli-runner.reliability.test.ts @@ -6,11 +6,59 @@ import { setupCliRunnerTestModule, supervisorSpawnMock, } from "./cli-runner.test-support.js"; +import { executePreparedCliRun } from "./cli-runner/execute.js"; import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js"; +import type { PreparedCliRunContext } from "./cli-runner/types.js"; + +function buildPreparedContext(params?: { + sessionKey?: string; + cliSessionId?: string; + runId?: string; +}): PreparedCliRunContext { + const backend = { + command: "codex", + args: ["exec", "--json"], + output: "text" as const, + input: "arg" as const, + modelArg: "--model", + sessionMode: "existing" as const, + serialize: true, + }; + return { + params: { + sessionId: "s1", + sessionKey: params?.sessionKey, + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp", + prompt: "hi", + provider: "codex-cli", + model: "gpt-5.4", + timeoutMs: 1_000, + runId: params?.runId ?? "run-2", + }, + started: Date.now(), + workspaceDir: "/tmp", + backendResolved: { + id: "codex-cli", + config: backend, + bundleMcp: false, + pluginId: "openai", + }, + preparedBackend: { + backend, + env: {}, + }, + reusableCliSession: params?.cliSessionId ? { sessionId: params.cliSessionId } : {}, + modelId: "gpt-5.4", + normalizedModel: "gpt-5.4", + systemPrompt: "You are a helpful assistant.", + systemPromptReport: {} as PreparedCliRunContext["systemPromptReport"], + bootstrapPromptWarningLines: [], + }; +} describe("runCliAgent reliability", () => { it("fails with timeout when no-output watchdog trips", async () => { - const runCliAgent = await setupCliRunnerTestModule(); supervisorSpawnMock.mockResolvedValueOnce( createManagedRun({ reason: "no-output-timeout", @@ -25,22 +73,14 @@ describe("runCliAgent reliability", () => { ); await expect( - runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - prompt: "hi", - provider: "codex-cli", - model: "gpt-5.4", - timeoutMs: 1_000, - runId: "run-2", - cliSessionId: "thread-123", - }), + executePreparedCliRun( + buildPreparedContext({ cliSessionId: "thread-123", runId: "run-2" }), + "thread-123", + ), ).rejects.toThrow("produced no output"); }); it("enqueues a system event and heartbeat wake on no-output watchdog timeout for session runs", async () => { - const runCliAgent = await setupCliRunnerTestModule(); supervisorSpawnMock.mockResolvedValueOnce( createManagedRun({ reason: "no-output-timeout", @@ -55,18 +95,14 @@ describe("runCliAgent reliability", () => { ); await expect( - runCliAgent({ - sessionId: "s1", - sessionKey: "agent:main:main", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - prompt: "hi", - provider: "codex-cli", - model: "gpt-5.4", - timeoutMs: 1_000, - runId: "run-2b", - cliSessionId: "thread-123", - }), + executePreparedCliRun( + buildPreparedContext({ + sessionKey: "agent:main:main", + cliSessionId: "thread-123", + runId: "run-2b", + }), + "thread-123", + ), ).rejects.toThrow("produced no output"); expect(enqueueSystemEventMock).toHaveBeenCalledTimes(1); @@ -81,7 +117,6 @@ describe("runCliAgent reliability", () => { }); it("fails with timeout when overall timeout trips", async () => { - const runCliAgent = await setupCliRunnerTestModule(); supervisorSpawnMock.mockResolvedValueOnce( createManagedRun({ reason: "overall-timeout", @@ -96,17 +131,10 @@ describe("runCliAgent reliability", () => { ); await expect( - runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - prompt: "hi", - provider: "codex-cli", - model: "gpt-5.4", - timeoutMs: 1_000, - runId: "run-3", - cliSessionId: "thread-123", - }), + executePreparedCliRun( + buildPreparedContext({ cliSessionId: "thread-123", runId: "run-3" }), + "thread-123", + ), ).rejects.toThrow("exceeded timeout"); }); diff --git a/src/agents/cli-runner.spawn.test.ts b/src/agents/cli-runner.spawn.test.ts index bae7699c1dd..0959040213c 100644 --- a/src/agents/cli-runner.spawn.test.ts +++ b/src/agents/cli-runner.spawn.test.ts @@ -2,7 +2,6 @@ import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { beforeEach, describe, expect, it, vi } from "vitest"; -import type { OpenClawConfig } from "../config/config.js"; import { onAgentEvent, resetAgentEventsForTest } from "../infra/agent-events.js"; import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js"; import { @@ -13,10 +12,8 @@ import { createManagedRun, mockSuccessfulCliRun, restoreCliRunnerPrepareTestDeps, - runCliAgentWithBackendConfig, setupCliRunnerTestModule, SMALL_PNG_BASE64, - stubBootstrapContext, supervisorSpawnMock, } from "./cli-runner.test-support.js"; import { executePreparedCliRun } from "./cli-runner/execute.js"; @@ -26,8 +23,71 @@ import type { PreparedCliRunContext } from "./cli-runner/types.js"; beforeEach(() => { resetAgentEventsForTest(); restoreCliRunnerPrepareTestDeps(); + supervisorSpawnMock.mockClear(); }); +function buildPreparedCliRunContext(params: { + provider: "claude-cli" | "codex-cli"; + model: string; + runId: string; + prompt?: string; + backend?: Partial; +}): PreparedCliRunContext { + const baseBackend = + params.provider === "claude-cli" + ? { + command: "claude", + args: ["-p", "--output-format", "stream-json"], + output: "jsonl" as const, + input: "stdin" as const, + modelArg: "--model", + systemPromptArg: "--append-system-prompt", + systemPromptWhen: "first" as const, + serialize: true, + } + : { + command: "codex", + args: ["exec", "--json"], + resumeArgs: ["exec", "resume", "{sessionId}", "--json"], + output: "text" as const, + input: "arg" as const, + modelArg: "--model", + sessionMode: "existing" as const, + serialize: true, + }; + const backend = { ...baseBackend, ...params.backend }; + return { + params: { + sessionId: "s1", + sessionFile: "/tmp/session.jsonl", + workspaceDir: "/tmp", + prompt: params.prompt ?? "hi", + provider: params.provider, + model: params.model, + timeoutMs: 1_000, + runId: params.runId, + }, + started: Date.now(), + workspaceDir: "/tmp", + backendResolved: { + id: params.provider, + config: backend, + bundleMcp: params.provider === "claude-cli", + pluginId: params.provider === "claude-cli" ? "anthropic" : "openai", + }, + preparedBackend: { + backend, + env: {}, + }, + reusableCliSession: {}, + modelId: params.model, + normalizedModel: params.model, + systemPrompt: "You are a helpful assistant.", + systemPromptReport: {} as PreparedCliRunContext["systemPromptReport"], + bootstrapPromptWarningLines: [], + }; +} + describe("runCliAgent spawn path", () => { it("does not inject hardcoded 'Tools are disabled' text into CLI arguments", async () => { supervisorSpawnMock.mockResolvedValueOnce( @@ -93,7 +153,6 @@ describe("runCliAgent spawn path", () => { }); it("pipes Claude prompts over stdin instead of argv", async () => { - const runCliAgent = await setupCliRunnerTestModule(); supervisorSpawnMock.mockResolvedValueOnce( createManagedRun({ reason: "exit", @@ -107,16 +166,14 @@ describe("runCliAgent spawn path", () => { }), ); - await runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - prompt: "Explain this diff", - provider: "claude-cli", - model: "sonnet", - timeoutMs: 1_000, - runId: "run-stdin-claude", - }); + await executePreparedCliRun( + buildPreparedCliRunContext({ + provider: "claude-cli", + model: "sonnet", + runId: "run-stdin-claude", + prompt: "Explain this diff", + }), + ); const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; @@ -127,7 +184,6 @@ describe("runCliAgent spawn path", () => { }); it("runs CLI through supervisor and returns payload", async () => { - const runCliAgent = await setupCliRunnerTestModule(); supervisorSpawnMock.mockResolvedValueOnce( createManagedRun({ reason: "exit", @@ -141,19 +197,16 @@ describe("runCliAgent spawn path", () => { }), ); - const result = await runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - prompt: "hi", + const context = buildPreparedCliRunContext({ provider: "codex-cli", model: "gpt-5.4", - timeoutMs: 1_000, runId: "run-1", - cliSessionId: "thread-123", }); + context.reusableCliSession = { sessionId: "thread-123" }; - expect(result.payloads?.[0]?.text).toBe("ok"); + const result = await executePreparedCliRun(context, "thread-123"); + + expect(result.text).toBe("ok"); const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; mode?: string; @@ -171,7 +224,6 @@ describe("runCliAgent spawn path", () => { }); it("cancels the managed CLI run when the abort signal fires", async () => { - const runCliAgent = await setupCliRunnerTestModule(); const abortController = new AbortController(); let resolveWait!: (value: { reason: @@ -215,17 +267,14 @@ describe("runCliAgent spawn path", () => { cancel, }); - const runPromise = runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - prompt: "hi", + const context = buildPreparedCliRunContext({ provider: "codex-cli", model: "gpt-5.4", - timeoutMs: 1_000, runId: "run-abort", - abortSignal: abortController.signal, }); + context.params.abortSignal = abortController.signal; + + const runPromise = executePreparedCliRun(context); await vi.waitFor(() => { expect(supervisorSpawnMock).toHaveBeenCalledTimes(1); @@ -237,7 +286,6 @@ describe("runCliAgent spawn path", () => { }); it("streams Claude text deltas from stream-json stdout", async () => { - const runCliAgent = await setupCliRunnerTestModule(); const agentEvents: Array<{ stream: string; text?: string; delta?: string }> = []; const stop = onAgentEvent((evt) => { agentEvents.push({ @@ -291,18 +339,15 @@ describe("runCliAgent spawn path", () => { }); try { - const result = await runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - prompt: "hi", - provider: "claude-cli", - model: "sonnet", - timeoutMs: 1_000, - runId: "run-claude-stream-json", - }); + const result = await executePreparedCliRun( + buildPreparedCliRunContext({ + provider: "claude-cli", + model: "sonnet", + runId: "run-claude-stream-json", + }), + ); - expect(result.payloads?.[0]?.text).toBe("Hello world"); + expect(result.text).toBe("Hello world"); expect(agentEvents).toEqual([ { stream: "assistant", text: "Hello", delta: "Hello" }, { stream: "assistant", text: "Hello world", delta: " world" }, @@ -313,22 +358,24 @@ describe("runCliAgent spawn path", () => { }); it("sanitizes dangerous backend env overrides before spawn", async () => { - const runCliAgent = await setupCliRunnerTestModule(); mockSuccessfulCliRun(); - await runCliAgentWithBackendConfig({ - runCliAgent, - backend: { - command: "codex", - env: { - NODE_OPTIONS: "--require ./malicious.js", - LD_PRELOAD: "/tmp/pwn.so", - PATH: "/tmp/evil", - HOME: "/tmp/evil-home", - SAFE_KEY: "ok", + await executePreparedCliRun( + buildPreparedCliRunContext({ + provider: "codex-cli", + model: "gpt-5.4", + runId: "run-env-sanitized", + backend: { + env: { + NODE_OPTIONS: "--require ./malicious.js", + LD_PRELOAD: "/tmp/pwn.so", + PATH: "/tmp/evil", + HOME: "/tmp/evil-home", + SAFE_KEY: "ok", + }, }, - }, - runId: "run-env-sanitized", - }); + }), + "thread-123", + ); const input = supervisorSpawnMock.mock.calls[0]?.[0] as { env?: Record; @@ -341,20 +388,22 @@ describe("runCliAgent spawn path", () => { }); it("applies clearEnv after sanitizing backend env overrides", async () => { - const runCliAgent = await setupCliRunnerTestModule(); process.env.SAFE_CLEAR = "from-base"; mockSuccessfulCliRun(); - await runCliAgentWithBackendConfig({ - runCliAgent, - backend: { - command: "codex", - env: { - SAFE_KEEP: "keep-me", + await executePreparedCliRun( + buildPreparedCliRunContext({ + provider: "codex-cli", + model: "gpt-5.4", + runId: "run-clear-env", + backend: { + env: { + SAFE_KEEP: "keep-me", + }, + clearEnv: ["SAFE_CLEAR"], }, - clearEnv: ["SAFE_CLEAR"], - }, - runId: "run-clear-env", - }); + }), + "thread-123", + ); const input = supervisorSpawnMock.mock.calls[0]?.[0] as { env?: Record; @@ -364,20 +413,22 @@ describe("runCliAgent spawn path", () => { }); it("keeps explicit backend env overrides even when clearEnv drops inherited values", async () => { - const runCliAgent = await setupCliRunnerTestModule(); process.env.SAFE_OVERRIDE = "from-base"; mockSuccessfulCliRun(); - await runCliAgentWithBackendConfig({ - runCliAgent, - backend: { - command: "codex", - env: { - SAFE_OVERRIDE: "from-override", + await executePreparedCliRun( + buildPreparedCliRunContext({ + provider: "codex-cli", + model: "gpt-5.4", + runId: "run-clear-env-override", + backend: { + env: { + SAFE_OVERRIDE: "from-override", + }, + clearEnv: ["SAFE_OVERRIDE"], }, - clearEnv: ["SAFE_OVERRIDE"], - }, - runId: "run-clear-env-override", - }); + }), + "thread-123", + ); const input = supervisorSpawnMock.mock.calls[0]?.[0] as { env?: Record; @@ -386,7 +437,6 @@ describe("runCliAgent spawn path", () => { }); it("clears claude-cli provider-routing, auth, and telemetry env while keeping host-managed hardening", async () => { - const runCliAgent = await setupCliRunnerTestModule(); vi.stubEnv("ANTHROPIC_BASE_URL", "https://proxy.example.com/v1"); vi.stubEnv("CLAUDE_CODE_USE_BEDROCK", "1"); vi.stubEnv("ANTHROPIC_AUTH_TOKEN", "env-auth-token"); @@ -400,32 +450,34 @@ describe("runCliAgent spawn path", () => { vi.stubEnv("OTEL_SDK_DISABLED", "true"); mockSuccessfulCliRun(); - await runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - config: { - agents: { - defaults: { - cliBackends: { - "claude-cli": { - command: "claude", - env: { - SAFE_KEEP: "ok", - ANTHROPIC_BASE_URL: "https://override.example.com/v1", - CLAUDE_CODE_OAUTH_TOKEN: "override-oauth-token", - }, - }, - }, + await executePreparedCliRun( + buildPreparedCliRunContext({ + provider: "claude-cli", + model: "claude-sonnet-4-6", + runId: "run-claude-env-hardened", + backend: { + env: { + SAFE_KEEP: "ok", + ANTHROPIC_BASE_URL: "https://override.example.com/v1", + CLAUDE_CODE_OAUTH_TOKEN: "override-oauth-token", + CLAUDE_CODE_PROVIDER_MANAGED_BY_HOST: "1", }, + clearEnv: [ + "ANTHROPIC_BASE_URL", + "CLAUDE_CODE_USE_BEDROCK", + "ANTHROPIC_AUTH_TOKEN", + "CLAUDE_CODE_OAUTH_TOKEN", + "CLAUDE_CODE_REMOTE", + "ANTHROPIC_UNIX_SOCKET", + "OTEL_LOGS_EXPORTER", + "OTEL_METRICS_EXPORTER", + "OTEL_TRACES_EXPORTER", + "OTEL_EXPORTER_OTLP_PROTOCOL", + "OTEL_SDK_DISABLED", + ], }, - } satisfies OpenClawConfig, - prompt: "hi", - provider: "claude-cli", - model: "claude-sonnet-4-6", - timeoutMs: 1_000, - runId: "run-claude-env-hardened", - }); + }), + ); const input = supervisorSpawnMock.mock.calls[0]?.[0] as { env?: Record; @@ -446,7 +498,6 @@ describe("runCliAgent spawn path", () => { }); it("prepends bootstrap warnings to the CLI prompt body", async () => { - const runCliAgent = await setupCliRunnerTestModule(); supervisorSpawnMock.mockResolvedValueOnce( createManagedRun({ reason: "exit", @@ -459,37 +510,18 @@ describe("runCliAgent spawn path", () => { noOutputTimedOut: false, }), ); - stubBootstrapContext({ - bootstrapFiles: [ - { - name: "AGENTS.md", - path: "/tmp/AGENTS.md", - content: "A".repeat(200), - missing: false, - }, - ], - contextFiles: [{ path: "AGENTS.md", content: "A".repeat(20) }], - }); - - await runCliAgent({ - sessionId: "s1", - sessionFile: "/tmp/session.jsonl", - workspaceDir: "/tmp", - config: { - agents: { - defaults: { - bootstrapMaxChars: 50, - bootstrapTotalMaxChars: 50, - }, - }, - } satisfies OpenClawConfig, - prompt: "hi", + const context = buildPreparedCliRunContext({ provider: "codex-cli", model: "gpt-5.4", - timeoutMs: 1_000, runId: "run-warning", - cliSessionId: "thread-123", }); + context.reusableCliSession = { sessionId: "thread-123" }; + context.bootstrapPromptWarningLines = [ + "[Bootstrap truncation warning]", + "- AGENTS.md: 200 raw -> 20 injected", + ]; + + await executePreparedCliRun(context, "thread-123"); const input = supervisorSpawnMock.mock.calls[0]?.[0] as { argv?: string[]; @@ -713,53 +745,4 @@ describe("runCliAgent spawn path", () => { const argv = input.argv ?? []; expect(argv.filter((arg) => arg === "--image")).toHaveLength(1); }); - - it("falls back to per-agent workspace when workspaceDir is missing", async () => { - const runCliAgent = await setupCliRunnerTestModule(); - const tempDir = await fs.mkdtemp( - path.join(process.env.TMPDIR ?? "/tmp", "openclaw-cli-runner-"), - ); - const fallbackWorkspace = path.join(tempDir, "workspace-main"); - await fs.mkdir(fallbackWorkspace, { recursive: true }); - const cfg = { - agents: { - defaults: { - workspace: fallbackWorkspace, - }, - }, - } satisfies OpenClawConfig; - - supervisorSpawnMock.mockResolvedValueOnce( - createManagedRun({ - reason: "exit", - exitCode: 0, - exitSignal: null, - durationMs: 25, - stdout: "ok", - stderr: "", - timedOut: false, - noOutputTimedOut: false, - }), - ); - - try { - await runCliAgent({ - sessionId: "s1", - sessionKey: "agent:main:subagent:missing-workspace", - sessionFile: "/tmp/session.jsonl", - workspaceDir: undefined as unknown as string, - config: cfg, - prompt: "hi", - provider: "codex-cli", - model: "gpt-5.4", - timeoutMs: 1_000, - runId: "run-4", - }); - } finally { - await fs.rm(tempDir, { recursive: true, force: true }); - } - - const input = supervisorSpawnMock.mock.calls[0]?.[0] as { cwd?: string }; - expect(input.cwd).toBe(path.resolve(fallbackWorkspace)); - }); });