From bb55e23c67b87f01ccc4c85f302a7cceadf20b44 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 06:51:23 +0100 Subject: [PATCH] test(e2e): cover OpenAI web search minimal reasoning --- docs/help/testing.md | 1 + package.json | 1 + .../e2e/openai-web-search-minimal-docker.sh | 461 ++++++++++++++++++ scripts/test-docker-all.sh | 1 + 4 files changed, 464 insertions(+) create mode 100755 scripts/e2e/openai-web-search-minimal-docker.sh diff --git a/docs/help/testing.md b/docs/help/testing.md index 0032ddb2cf6..3ca36b3e008 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -927,6 +927,7 @@ The live-model Docker runners also bind-mount only the needed CLI auth homes (or - Onboarding wizard (TTY, full scaffolding): `pnpm test:docker:onboard` (script: `scripts/e2e/onboard-docker.sh`) - Npm tarball onboarding/channel/agent smoke: `pnpm test:docker:npm-onboard-channel-agent` installs the packed OpenClaw tarball globally in Docker, configures OpenAI via env-ref onboarding plus Telegram by default, verifies enabling the plugin installs its runtime deps on demand, runs doctor, and runs one mocked OpenAI agent turn. Reuse a prebuilt tarball with `OPENCLAW_NPM_ONBOARD_PACKAGE_TGZ=/path/to/openclaw-*.tgz`, skip the host rebuild with `OPENCLAW_NPM_ONBOARD_HOST_BUILD=0`, or switch channel with `OPENCLAW_NPM_ONBOARD_CHANNEL=discord`. - Gateway networking (two containers, WS auth + health): `pnpm test:docker:gateway-network` (script: `scripts/e2e/gateway-network-docker.sh`) +- OpenAI Responses web_search minimal reasoning regression: `pnpm test:docker:openai-web-search-minimal` (script: `scripts/e2e/openai-web-search-minimal-docker.sh`) runs a mocked OpenAI server through Gateway, verifies `web_search` raises `reasoning.effort` from `minimal` to `low`, then forces the provider schema reject and checks the raw detail appears in Gateway logs. - MCP channel bridge (seeded Gateway + stdio bridge + raw Claude notification-frame smoke): `pnpm test:docker:mcp-channels` (script: `scripts/e2e/mcp-channels-docker.sh`) - Pi bundle MCP tools (real stdio MCP server + embedded Pi profile allow/deny smoke): `pnpm test:docker:pi-bundle-mcp-tools` (script: `scripts/e2e/pi-bundle-mcp-tools-docker.sh`) - Cron/subagent MCP cleanup (real Gateway + stdio MCP child teardown after isolated cron and one-shot subagent runs): `pnpm test:docker:cron-mcp-cleanup` (script: `scripts/e2e/cron-mcp-cleanup-docker.sh`) diff --git a/package.json b/package.json index b52e1c7c553..c74e18b4745 100644 --- a/package.json +++ b/package.json @@ -1447,6 +1447,7 @@ "test:docker:mcp-channels": "bash scripts/e2e/mcp-channels-docker.sh", "test:docker:npm-onboard-channel-agent": "bash scripts/e2e/npm-onboard-channel-agent-docker.sh", "test:docker:onboard": "bash scripts/e2e/onboard-docker.sh", + "test:docker:openai-web-search-minimal": "bash scripts/e2e/openai-web-search-minimal-docker.sh", "test:docker:openwebui": "bash scripts/e2e/openwebui-docker.sh", "test:docker:pi-bundle-mcp-tools": "bash scripts/e2e/pi-bundle-mcp-tools-docker.sh", "test:docker:plugin-update": "bash scripts/e2e/plugin-update-unchanged-docker.sh", diff --git a/scripts/e2e/openai-web-search-minimal-docker.sh b/scripts/e2e/openai-web-search-minimal-docker.sh new file mode 100755 index 00000000000..bd727fcef5b --- /dev/null +++ b/scripts/e2e/openai-web-search-minimal-docker.sh @@ -0,0 +1,461 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +source "$ROOT_DIR/scripts/lib/docker-e2e-image.sh" + +IMAGE_NAME="$(docker_e2e_resolve_image "openclaw-openai-web-search-minimal-e2e" OPENCLAW_OPENAI_WEB_SEARCH_MINIMAL_E2E_IMAGE)" +SKIP_BUILD="${OPENCLAW_OPENAI_WEB_SEARCH_MINIMAL_E2E_SKIP_BUILD:-0}" +PORT="18789" +MOCK_PORT="19191" +TOKEN="openai-web-search-minimal-e2e-$$" + +docker_e2e_build_or_reuse "$IMAGE_NAME" openai-web-search-minimal "$ROOT_DIR/scripts/e2e/Dockerfile" "$ROOT_DIR" "" "$SKIP_BUILD" + +echo "Running OpenAI web_search minimal reasoning Docker E2E..." +run_logged openai-web-search-minimal docker run --rm \ + -e "OPENCLAW_GATEWAY_TOKEN=$TOKEN" \ + -e "OPENAI_API_KEY=sk-openclaw-web-search-minimal-e2e" \ + -e "BRAVE_API_KEY=brave-openclaw-web-search-minimal-e2e" \ + -e "PORT=$PORT" \ + -e "MOCK_PORT=$MOCK_PORT" \ + -i "$IMAGE_NAME" bash -s <<'EOF' +set -euo pipefail + +export HOME="$(mktemp -d "/tmp/openclaw-openai-web-search-minimal.XXXXXX")" +export OPENCLAW_STATE_DIR="$HOME/.openclaw" +export OPENCLAW_SKIP_CHANNELS=1 +export OPENCLAW_SKIP_GMAIL_WATCHER=1 +export OPENCLAW_SKIP_CRON=1 +export OPENCLAW_SKIP_CANVAS_HOST=1 + +PORT="${PORT:?missing PORT}" +MOCK_PORT="${MOCK_PORT:?missing MOCK_PORT}" +TOKEN="${OPENCLAW_GATEWAY_TOKEN:?missing OPENCLAW_GATEWAY_TOKEN}" +SUCCESS_MARKER="OPENCLAW_SCHEMA_E2E_OK" +RAW_SCHEMA_ERROR="400 The following tools cannot be used with reasoning.effort 'minimal': web_search." +MOCK_REQUEST_LOG="/tmp/openclaw-openai-web-search-minimal-requests.jsonl" +GATEWAY_LOG="/tmp/openclaw-openai-web-search-minimal-gateway.log" +mock_pid="" +gateway_pid="" + +cleanup() { + if [ -n "${gateway_pid:-}" ] && kill -0 "$gateway_pid" 2>/dev/null; then + kill "$gateway_pid" 2>/dev/null || true + wait "$gateway_pid" 2>/dev/null || true + fi + if [ -n "${mock_pid:-}" ] && kill -0 "$mock_pid" 2>/dev/null; then + kill "$mock_pid" 2>/dev/null || true + wait "$mock_pid" 2>/dev/null || true + fi +} +trap cleanup EXIT + +dump_debug_logs() { + local status="$1" + echo "OpenAI web_search minimal Docker E2E failed with exit code $status" >&2 + for file in \ + "$GATEWAY_LOG" \ + /tmp/openclaw-openai-web-search-minimal-mock.log \ + /tmp/openclaw-openai-web-search-minimal-client-success.log \ + /tmp/openclaw-openai-web-search-minimal-client-reject.log \ + "$MOCK_REQUEST_LOG" \ + "$OPENCLAW_STATE_DIR/openclaw.json"; do + if [ -f "$file" ]; then + echo "--- $file ---" >&2 + sed -n '1,260p' "$file" >&2 || true + fi + done +} +trap 'status=$?; dump_debug_logs "$status"; exit "$status"' ERR + +entry=dist/index.mjs +[ -f "$entry" ] || entry=dist/index.js +mkdir -p "$OPENCLAW_STATE_DIR" + +cat >"$OPENCLAW_STATE_DIR/openclaw.json" </tmp/openclaw-openai-web-search-minimal-mock.mjs <<'NODE' +import http from "node:http"; +import fs from "node:fs"; + +const port = Number(process.env.MOCK_PORT); +const requestLog = process.env.MOCK_REQUEST_LOG; +const successMarker = process.env.SUCCESS_MARKER; +const rawSchemaError = process.env.RAW_SCHEMA_ERROR; + +function readBody(req) { + return new Promise((resolve, reject) => { + let body = ""; + req.setEncoding("utf8"); + req.on("data", (chunk) => { + body += chunk; + }); + req.on("end", () => resolve(body)); + req.on("error", reject); + }); +} + +function writeJson(res, status, body) { + res.writeHead(status, { "content-type": "application/json" }); + res.end(JSON.stringify(body)); +} + +function writeOpenAiReject(res) { + writeJson(res, 400, { + error: { + message: rawSchemaError.replace(/^400\s+/, ""), + type: "invalid_request_error", + code: "invalid_request_error", + }, + }); +} + +function hasWebSearchTool(tools) { + return Array.isArray(tools) && tools.some((tool) => { + if (!tool || typeof tool !== "object") return false; + if (tool.type === "web_search") return true; + if (tool.type === "function" && tool.name === "web_search") return true; + if (tool.type === "function" && tool.function?.name === "web_search") return true; + return false; + }); +} + +function bodyContainsForceReject(body) { + return JSON.stringify(body).includes("FORCE_SCHEMA_REJECT"); +} + +function responseEvents(text) { + return [ + { + type: "response.output_item.added", + item: { + type: "message", + id: "msg_schema_e2e_1", + role: "assistant", + content: [], + status: "in_progress", + }, + }, + { + type: "response.output_item.done", + item: { + type: "message", + id: "msg_schema_e2e_1", + role: "assistant", + status: "completed", + content: [{ type: "output_text", text, annotations: [] }], + }, + }, + { + type: "response.completed", + response: { + id: "resp_schema_e2e_1", + status: "completed", + usage: { + input_tokens: 11, + output_tokens: 7, + total_tokens: 18, + input_tokens_details: { cached_tokens: 0 }, + }, + }, + }, + ]; +} + +function writeSse(res, events) { + res.writeHead(200, { + "content-type": "text/event-stream", + "cache-control": "no-store", + connection: "keep-alive", + }); + for (const event of events) { + res.write(`data: ${JSON.stringify(event)}\n\n`); + } + res.write("data: [DONE]\n\n"); + res.end(); +} + +const server = http.createServer(async (req, res) => { + const url = new URL(req.url ?? "/", "http://127.0.0.1"); + if (req.method === "GET" && url.pathname === "/health") { + writeJson(res, 200, { ok: true }); + return; + } + if (req.method === "GET" && url.pathname === "/v1/models") { + writeJson(res, 200, { + object: "list", + data: [{ id: "gpt-5", object: "model", owned_by: "openclaw-e2e" }], + }); + return; + } + + const bodyText = await readBody(req); + let body = {}; + try { + body = bodyText ? JSON.parse(bodyText) : {}; + } catch { + body = {}; + } + fs.appendFileSync(requestLog, `${JSON.stringify({ method: req.method, path: url.pathname, body })}\n`); + + if (req.method === "POST" && url.pathname === "/v1/responses") { + if (bodyContainsForceReject(body)) { + writeOpenAiReject(res); + return; + } + if (body?.reasoning?.effort === "minimal" && hasWebSearchTool(body.tools)) { + writeOpenAiReject(res); + return; + } + writeSse(res, responseEvents(successMarker)); + return; + } + + writeJson(res, 404, { error: { message: `unhandled mock route: ${req.method} ${url.pathname}` } }); +}); + +server.listen(port, "127.0.0.1", () => { + console.log(`mock-openai listening on ${port}`); +}); +NODE + +MOCK_PORT="$MOCK_PORT" \ +MOCK_REQUEST_LOG="$MOCK_REQUEST_LOG" \ +SUCCESS_MARKER="$SUCCESS_MARKER" \ +RAW_SCHEMA_ERROR="$RAW_SCHEMA_ERROR" \ +node /tmp/openclaw-openai-web-search-minimal-mock.mjs >/tmp/openclaw-openai-web-search-minimal-mock.log 2>&1 & +mock_pid="$!" + +for _ in $(seq 1 80); do + if node -e "fetch('http://127.0.0.1:${MOCK_PORT}/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))" >/dev/null 2>&1; then + break + fi + sleep 0.1 +done +node -e "fetch('http://127.0.0.1:${MOCK_PORT}/health').then(r=>process.exit(r.ok?0:1)).catch(()=>process.exit(1))" >/dev/null + +node "$entry" gateway --port "$PORT" --bind lan --allow-unconfigured >"$GATEWAY_LOG" 2>&1 & +gateway_pid="$!" +for _ in $(seq 1 360); do + if ! kill -0 "$gateway_pid" 2>/dev/null; then + echo "gateway exited before listening" >&2 + exit 1 + fi + if node --input-type=module -e " + import net from 'node:net'; + const socket = net.createConnection({ host: '127.0.0.1', port: Number(process.env.PORT) }); + const timeout = setTimeout(() => { socket.destroy(); process.exit(1); }, 400); + socket.on('connect', () => { clearTimeout(timeout); socket.end(); process.exit(0); }); + socket.on('error', () => { clearTimeout(timeout); process.exit(1); }); + " >/dev/null 2>&1; then + break + fi + sleep 0.25 +done +node --input-type=module -e " + import net from 'node:net'; + const socket = net.createConnection({ host: '127.0.0.1', port: Number(process.env.PORT) }); + const timeout = setTimeout(() => { socket.destroy(); process.exit(1); }, 1000); + socket.on('connect', () => { clearTimeout(timeout); socket.end(); process.exit(0); }); + socket.on('error', () => { clearTimeout(timeout); process.exit(1); }); +" >/dev/null + +cat >/tmp/openclaw-openai-web-search-minimal-client.mjs <<'NODE' +const PROTOCOL_VERSION = 3; +const port = process.env.PORT; +const token = process.env.OPENCLAW_GATEWAY_TOKEN; +const mode = process.argv[2]; +const message = + mode === "reject" + ? "FORCE_SCHEMA_REJECT" + : "Return exactly OPENCLAW_SCHEMA_E2E_OK."; +const id = mode === "reject" ? "schema-reject" : "schema-success"; + +if (!port || !token) throw new Error("missing PORT/OPENCLAW_GATEWAY_TOKEN"); + +const ws = new WebSocket(`ws://127.0.0.1:${port}`); +await new Promise((resolve, reject) => { + const t = setTimeout(() => reject(new Error("ws open timeout")), 5000); + ws.addEventListener("open", () => { + clearTimeout(t); + resolve(); + }, { once: true }); +}); + +function onceFrame(filter, timeoutMs = 30000) { + return new Promise((resolve, reject) => { + const t = setTimeout(() => reject(new Error("timeout waiting for frame")), timeoutMs); + const handler = (event) => { + const obj = JSON.parse(String(event.data)); + if (!filter(obj)) return; + clearTimeout(t); + ws.removeEventListener("message", handler); + resolve(obj); + }; + ws.addEventListener("message", handler); + }); +} + +ws.send(JSON.stringify({ + type: "req", + id: "connect", + method: "connect", + params: { + minProtocol: PROTOCOL_VERSION, + maxProtocol: PROTOCOL_VERSION, + client: { + id: "gateway-client", + displayName: `openai-web-search-minimal-${mode}`, + version: "dev", + platform: process.platform, + mode: "backend", + }, + role: "operator", + scopes: ["operator.read", "operator.write", "operator.admin"], + caps: ["tool-events"], + auth: { token }, + }, +})); +const connectRes = await onceFrame((o) => o?.type === "res" && o?.id === "connect"); +if (!connectRes.ok) throw new Error(`connect failed: ${connectRes.error?.message ?? "unknown"}`); + +ws.send(JSON.stringify({ + type: "req", + id, + method: "chat.send", + params: { + sessionKey: "agent:main:main", + message, + thinking: "minimal", + deliver: false, + timeoutMs: 30000, + idempotencyKey: id, + }, +})); +const sendRes = await onceFrame((o) => o?.type === "res" && o?.id === id); +if (!sendRes.ok) throw new Error(`chat.send failed: ${sendRes.error?.message ?? "unknown"}`); + +if (mode === "reject") { + ws.close(); + process.exit(0); +} + +const terminal = await onceFrame( + (o) => + o?.type === "event" && + o?.event === "chat" && + o?.payload?.runId === id && + (o?.payload?.state === "final" || o?.payload?.state === "error"), + 45000, +); +ws.close(); + +if (mode === "success" && terminal.payload?.state !== "final") { + throw new Error(`expected final success event, got ${JSON.stringify(terminal)}`); +} +NODE + +PORT="$PORT" OPENCLAW_GATEWAY_TOKEN="$TOKEN" node /tmp/openclaw-openai-web-search-minimal-client.mjs success >/tmp/openclaw-openai-web-search-minimal-client-success.log 2>&1 + +node - "$MOCK_REQUEST_LOG" <<'NODE' +const fs = require("node:fs"); +const logPath = process.argv[2]; +const entries = fs.readFileSync(logPath, "utf8").trim().split(/\n+/).filter(Boolean).map((line) => JSON.parse(line)); +const responseEntries = entries.filter((entry) => entry.path === "/v1/responses"); +if (responseEntries.length < 1) { + throw new Error(`mock OpenAI /v1/responses was not used. Requests: ${JSON.stringify(entries)}`); +} +const success = responseEntries.find((entry) => JSON.stringify(entry.body).includes("OPENCLAW_SCHEMA_E2E_OK")); +if (!success) { + throw new Error(`missing success request. Requests: ${JSON.stringify(responseEntries)}`); +} +const tools = Array.isArray(success.body.tools) ? success.body.tools : []; +const hasWebSearch = tools.some((tool) => tool?.type === "web_search" || (tool?.type === "function" && (tool?.name === "web_search" || tool?.function?.name === "web_search"))); +if (!hasWebSearch) { + throw new Error(`success request did not include web_search. Body: ${JSON.stringify(success.body)}`); +} +if (success.body.reasoning?.effort !== "low") { + throw new Error(`expected reasoning.effort low with web_search, got ${JSON.stringify(success.body.reasoning)}`); +} +NODE + +PORT="$PORT" OPENCLAW_GATEWAY_TOKEN="$TOKEN" node /tmp/openclaw-openai-web-search-minimal-client.mjs reject >/tmp/openclaw-openai-web-search-minimal-client-reject.log 2>&1 + +for _ in $(seq 1 80); do + if grep -Fq "$RAW_SCHEMA_ERROR" "$GATEWAY_LOG"; then + break + fi + sleep 0.25 +done +grep -F "$RAW_SCHEMA_ERROR" "$GATEWAY_LOG" >/dev/null + +echo "OpenAI web_search minimal reasoning Docker E2E passed" +EOF diff --git a/scripts/test-docker-all.sh b/scripts/test-docker-all.sh index 9dfd2c016cb..7145ea4c10d 100644 --- a/scripts/test-docker-all.sh +++ b/scripts/test-docker-all.sh @@ -15,6 +15,7 @@ OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:openwebui OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:onboard OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:npm-onboard-channel-agent OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:gateway-network +OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:openai-web-search-minimal OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:mcp-channels OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:pi-bundle-mcp-tools OPENCLAW_SKIP_DOCKER_BUILD=1 pnpm test:docker:cron-mcp-cleanup