From b68b4b9151ba394efcdfff77a601fd1e84eed7bb Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 26 Apr 2026 22:27:41 +0100 Subject: [PATCH] ci: add targeted docker lane reruns --- .../openclaw-live-and-e2e-checks-reusable.yml | 184 +++++++++++++++++- docs/ci.md | 2 +- scripts/docker/install-sh-e2e/run.sh | 20 +- scripts/test-docker-all.mjs | 122 ++++++++++-- 4 files changed, 297 insertions(+), 31 deletions(-) diff --git a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml index eeada4f02d2..04f9914115c 100644 --- a/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml +++ b/.github/workflows/openclaw-live-and-e2e-checks-reusable.yml @@ -23,6 +23,11 @@ on: required: false default: true type: boolean + docker_lanes: + description: Comma/space separated Docker scheduler lane names to run against the prepared image + required: false + default: "" + type: string include_live_suites: description: Whether to run live-provider coverage required: false @@ -54,6 +59,11 @@ on: required: false default: true type: boolean + docker_lanes: + description: Comma/space separated Docker scheduler lane names to run against the prepared image + required: false + default: "" + type: string include_live_suites: description: Whether to run live-provider coverage required: false @@ -363,7 +373,7 @@ jobs: validate_docker_e2e: needs: [validate_selected_ref, prepare_docker_e2e_image] - if: inputs.include_release_path_suites + if: inputs.include_release_path_suites && inputs.docker_lanes == '' name: Docker E2E (${{ matrix.label }}) runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: ${{ matrix.timeout_minutes }} @@ -517,11 +527,12 @@ jobs: console.log(""); console.log(`Status: \`${summary.status}\``); console.log(""); - console.log("| Lane | Status | Seconds | Timed out |"); - console.log("| --- | ---: | ---: | --- |"); + console.log("| Lane | Status | Seconds | Timed out | Rerun |"); + console.log("| --- | ---: | ---: | --- | --- |"); for (const lane of lanes) { const status = lane.status === 0 ? "pass" : `fail ${lane.status}`; - console.log(`| \`${lane.name}\` | ${status} | ${lane.elapsedSeconds ?? ""} | ${lane.timedOut ? "yes" : "no"} |`); + const rerun = String(lane.rerunCommand ?? "").replaceAll("`", "\\`"); + console.log(`| \`${lane.name}\` | ${status} | ${lane.elapsedSeconds ?? ""} | ${lane.timedOut ? "yes" : "no"} | \`${rerun}\` |`); } NODE @@ -533,9 +544,170 @@ jobs: path: .artifacts/docker-tests/ if-no-files-found: ignore + validate_docker_lanes: + needs: [validate_selected_ref, prepare_docker_e2e_image] + if: inputs.docker_lanes != '' + name: Docker E2E targeted lanes + runs-on: blacksmith-32vcpu-ubuntu-2404 + timeout-minutes: 180 + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }} + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + ANTHROPIC_API_TOKEN: ${{ secrets.ANTHROPIC_API_TOKEN }} + ANTHROPIC_API_KEY_OLD: ${{ secrets.ANTHROPIC_API_KEY_OLD }} + BYTEPLUS_API_KEY: ${{ secrets.BYTEPLUS_API_KEY }} + CEREBRAS_API_KEY: ${{ secrets.CEREBRAS_API_KEY }} + DASHSCOPE_API_KEY: ${{ secrets.DASHSCOPE_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} + KIMI_API_KEY: ${{ secrets.KIMI_API_KEY }} + MODELSTUDIO_API_KEY: ${{ secrets.MODELSTUDIO_API_KEY }} + MOONSHOT_API_KEY: ${{ secrets.MOONSHOT_API_KEY }} + MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} + MINIMAX_API_KEY: ${{ secrets.MINIMAX_API_KEY }} + OPENCODE_API_KEY: ${{ secrets.OPENCODE_API_KEY }} + OPENCODE_ZEN_API_KEY: ${{ secrets.OPENCODE_ZEN_API_KEY }} + OPENCLAW_LIVE_BROWSER_CDP_URL: ${{ secrets.OPENCLAW_LIVE_BROWSER_CDP_URL }} + OPENCLAW_LIVE_SETUP_TOKEN: ${{ secrets.OPENCLAW_LIVE_SETUP_TOKEN }} + OPENCLAW_LIVE_SETUP_TOKEN_MODEL: ${{ secrets.OPENCLAW_LIVE_SETUP_TOKEN_MODEL }} + OPENCLAW_LIVE_SETUP_TOKEN_PROFILE: ${{ secrets.OPENCLAW_LIVE_SETUP_TOKEN_PROFILE }} + OPENCLAW_LIVE_SETUP_TOKEN_VALUE: ${{ secrets.OPENCLAW_LIVE_SETUP_TOKEN_VALUE }} + GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} + QWEN_API_KEY: ${{ secrets.QWEN_API_KEY }} + FAL_KEY: ${{ secrets.FAL_KEY }} + RUNWAY_API_KEY: ${{ secrets.RUNWAY_API_KEY }} + DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} + TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }} + VYDRA_API_KEY: ${{ secrets.VYDRA_API_KEY }} + XAI_API_KEY: ${{ secrets.XAI_API_KEY }} + ZAI_API_KEY: ${{ secrets.ZAI_API_KEY }} + Z_AI_API_KEY: ${{ secrets.Z_AI_API_KEY }} + BYTEPLUS_ACCESS_KEY_ID: ${{ secrets.BYTEPLUS_ACCESS_KEY_ID }} + BYTEPLUS_SECRET_ACCESS_KEY: ${{ secrets.BYTEPLUS_SECRET_ACCESS_KEY }} + CLAUDE_CODE_OAUTH_TOKEN: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }} + OPENCLAW_CODEX_AUTH_JSON: ${{ secrets.OPENCLAW_CODEX_AUTH_JSON }} + OPENCLAW_CODEX_CONFIG_TOML: ${{ secrets.OPENCLAW_CODEX_CONFIG_TOML }} + OPENCLAW_CLAUDE_JSON: ${{ secrets.OPENCLAW_CLAUDE_JSON }} + OPENCLAW_CLAUDE_CREDENTIALS_JSON: ${{ secrets.OPENCLAW_CLAUDE_CREDENTIALS_JSON }} + OPENCLAW_CLAUDE_SETTINGS_JSON: ${{ secrets.OPENCLAW_CLAUDE_SETTINGS_JSON }} + OPENCLAW_CLAUDE_SETTINGS_LOCAL_JSON: ${{ secrets.OPENCLAW_CLAUDE_SETTINGS_LOCAL_JSON }} + OPENCLAW_GEMINI_SETTINGS_JSON: ${{ secrets.OPENCLAW_GEMINI_SETTINGS_JSON }} + FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }} + OPENCLAW_DOCKER_E2E_IMAGE: ${{ needs.prepare_docker_e2e_image.outputs.image }} + OPENCLAW_SKIP_DOCKER_BUILD: "1" + INCLUDE_OPENWEBUI: ${{ inputs.include_openwebui }} + DOCKER_E2E_LANES: ${{ inputs.docker_lanes }} + steps: + - name: Checkout selected ref + uses: actions/checkout@v6 + with: + ref: ${{ needs.validate_selected_ref.outputs.selected_sha }} + fetch-depth: 1 + + - name: Log in to GHCR for shared Docker E2E image + uses: docker/login-action@4907a6ddec9925e35a0a9e82d7399ccc52663121 # v4 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ github.token }} + + - name: Setup Node environment + uses: ./.github/actions/setup-node-env + with: + node-version: ${{ env.NODE_VERSION }} + pnpm-version: ${{ env.PNPM_VERSION }} + install-bun: "true" + + - name: Hydrate live auth/profile inputs + run: bash scripts/ci-hydrate-live-auth.sh + + - name: Pull shared Docker E2E image + shell: bash + run: | + set -euo pipefail + docker pull "${OPENCLAW_DOCKER_E2E_IMAGE}" + + - name: Validate targeted lane credentials + shell: bash + run: | + set -euo pipefail + lanes=" ${DOCKER_E2E_LANES//,/ } " + if [[ "$lanes" == *" install-e2e "* ]]; then + [[ -n "${OPENAI_API_KEY:-}" ]] || { + echo "OPENAI_API_KEY is required for installer Docker E2E." >&2 + exit 1 + } + if [[ -z "${ANTHROPIC_API_TOKEN:-}" && -z "${ANTHROPIC_API_KEY:-}" ]]; then + echo "ANTHROPIC_API_TOKEN or ANTHROPIC_API_KEY is required for installer Docker E2E." >&2 + exit 1 + fi + fi + if [[ "$lanes" == *" openwebui "* || "$lanes" == *" openai-web-search-minimal "* ]]; then + [[ -n "${OPENAI_API_KEY:-}" ]] || { + echo "OPENAI_API_KEY is required for selected OpenAI Docker lanes." >&2 + exit 1 + } + fi + + - name: Run targeted Docker E2E lanes + shell: bash + run: | + set -euo pipefail + lanes=" ${DOCKER_E2E_LANES//,/ } " + export OPENCLAW_DOCKER_ALL_LANES="${DOCKER_E2E_LANES}" + export OPENCLAW_DOCKER_ALL_PREFLIGHT=0 + export OPENCLAW_DOCKER_ALL_FAIL_FAST=0 + export OPENCLAW_DOCKER_ALL_INCLUDE_OPENWEBUI="${INCLUDE_OPENWEBUI}" + export OPENCLAW_DOCKER_ALL_LOG_DIR=".artifacts/docker-tests/targeted" + export OPENCLAW_DOCKER_ALL_TIMINGS_FILE=".artifacts/docker-tests/targeted-timings.json" + if [[ "$lanes" == *" live-"* ]]; then + export OPENCLAW_DOCKER_ALL_BUILD=1 + else + export OPENCLAW_DOCKER_ALL_BUILD=0 + fi + + pnpm test:docker:all + + - name: Summarize targeted Docker E2E lanes + if: always() + shell: bash + run: | + set -euo pipefail + summary=".artifacts/docker-tests/targeted/summary.json" + if [[ ! -f "$summary" ]]; then + echo "Docker targeted summary missing: \`$summary\`" >> "$GITHUB_STEP_SUMMARY" + exit 0 + fi + node --input-type=module - "$summary" <<'NODE' >> "$GITHUB_STEP_SUMMARY" + import fs from "node:fs"; + const summary = JSON.parse(fs.readFileSync(process.argv[2], "utf8")); + const lanes = Array.isArray(summary.lanes) ? summary.lanes : []; + console.log("### Docker E2E targeted lanes"); + console.log(""); + console.log(`Status: \`${summary.status}\``); + console.log(""); + console.log("| Lane | Status | Seconds | Timed out | Rerun |"); + console.log("| --- | ---: | ---: | --- | --- |"); + for (const lane of lanes) { + const status = lane.status === 0 ? "pass" : `fail ${lane.status}`; + const rerun = String(lane.rerunCommand ?? "").replaceAll("`", "\\`"); + console.log(`| \`${lane.name}\` | ${status} | ${lane.elapsedSeconds ?? ""} | ${lane.timedOut ? "yes" : "no"} | \`${rerun}\` |`); + } + NODE + + - name: Upload targeted Docker E2E artifacts + if: always() + uses: actions/upload-artifact@v7 + with: + name: docker-e2e-targeted + path: .artifacts/docker-tests/ + if-no-files-found: ignore + validate_docker_openwebui: needs: [validate_selected_ref, prepare_docker_e2e_image] - if: inputs.include_openwebui && !inputs.include_release_path_suites + if: inputs.include_openwebui && !inputs.include_release_path_suites && inputs.docker_lanes == '' runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: 75 env: @@ -578,7 +750,7 @@ jobs: prepare_docker_e2e_image: needs: validate_selected_ref - if: inputs.include_release_path_suites || inputs.include_openwebui + if: inputs.include_release_path_suites || inputs.include_openwebui || inputs.docker_lanes != '' runs-on: blacksmith-32vcpu-ubuntu-2404 timeout-minutes: 90 permissions: diff --git a/docs/ci.md b/docs/ci.md index a9bf09656ee..4cdbb3508e7 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -92,7 +92,7 @@ Scope logic lives in `scripts/ci-changed-scope.mjs` and is covered by unit tests CI workflow edits validate the Node CI graph plus workflow linting, but do not force Windows, Android, or macOS native builds by themselves; those platform lanes stay scoped to platform source changes. CI routing-only edits, selected cheap core-test fixture edits, and narrow plugin contract helper/test-routing edits use a fast Node-only manifest path: preflight, security, and a single `checks-fast-core` task. That path avoids build artifacts, Node 22 compatibility, channel contracts, full core shards, bundled-plugin shards, and additional guard matrices when the changed files are limited to the routing or helper surfaces that the fast task exercises directly. Windows Node checks are scoped to Windows-specific process/path wrappers, npm/pnpm/UI runner helpers, package manager config, and the CI workflow surfaces that execute that lane; unrelated source, plugin, install-smoke, and test-only changes stay on the Linux Node lanes so they do not reserve a 16-vCPU Windows worker for coverage that is already exercised by the normal test shards. -The separate `install-smoke` workflow reuses the same scope script through its own `preflight` job. It splits smoke coverage into `run_fast_install_smoke` and `run_full_install_smoke`. Pull requests run the fast path for Docker/package surfaces, bundled plugin package/manifest changes, and core plugin/channel/gateway/Plugin SDK surfaces that the Docker smoke jobs exercise. Source-only bundled plugin changes, test-only edits, and docs-only edits do not reserve Docker workers. The fast path builds the root Dockerfile image once, checks the CLI, runs the agents delete shared-workspace CLI smoke, runs the container gateway-network e2e, verifies a bundled extension build arg, and runs the bounded bundled-plugin Docker profile under a 240-second aggregate command timeout with each scenario's Docker run capped separately. The full path keeps QR package install and installer Docker/update coverage for nightly scheduled runs, manual dispatches, workflow-call release checks, and pull requests that truly touch installer/package/Docker surfaces. `main` pushes, including merge commits, do not force the full path; when changed-scope logic would request full coverage on a push, the workflow keeps the fast Docker smoke and leaves the full install smoke to nightly or release validation. The slow Bun global install image-provider smoke is separately gated by `run_bun_global_install_smoke`; it runs on the nightly schedule and from the release checks workflow, and manual `install-smoke` dispatches can opt into it, but pull requests and `main` pushes do not run it. QR and installer Docker tests keep their own install-focused Dockerfiles. Local `test:docker:all` prebuilds one shared live-test image and one shared `scripts/e2e/Dockerfile` built-app image, then runs the live/E2E smoke lanes with a weighted scheduler and `OPENCLAW_SKIP_DOCKER_BUILD=1`; tune the default main-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_PARALLELISM` and the provider-sensitive tail-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM`. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=6`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=8`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7` so npm install and multi-service lanes do not overcommit Docker while lighter lanes still fill available slots. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=0` or another millisecond value. The local aggregate preflights Docker, removes stale OpenClaw E2E containers, emits active-lane status, persists lane timings for longest-first ordering, and supports `OPENCLAW_DOCKER_ALL_DRY_RUN=1` for scheduler inspection. It stops scheduling new pooled lanes after the first failure by default, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. The reusable live/E2E workflow builds and pushes one SHA-tagged GHCR Docker E2E image, then runs the release-path Docker suite as at most three chunked jobs with `OPENCLAW_SKIP_DOCKER_BUILD=1` so each chunk pulls the shared image once and executes multiple lanes through the same weighted scheduler (`OPENCLAW_DOCKER_ALL_PROFILE=release-path`, `OPENCLAW_DOCKER_ALL_CHUNK=core|package-update|plugins-integrations`). Each chunk uploads `.artifacts/docker-tests/` with lane logs, timings, and `summary.json`. When Open WebUI is requested with the release-path suite, it runs inside the plugins/integrations chunk instead of reserving a fourth Docker worker; Open WebUI keeps a standalone job only for openwebui-only dispatches. The scheduled live/E2E workflow runs the full release-path Docker suite daily. The bundled update matrix is split by update target so repeated npm update and doctor repair passes can shard with other bundled checks. +The separate `install-smoke` workflow reuses the same scope script through its own `preflight` job. It splits smoke coverage into `run_fast_install_smoke` and `run_full_install_smoke`. Pull requests run the fast path for Docker/package surfaces, bundled plugin package/manifest changes, and core plugin/channel/gateway/Plugin SDK surfaces that the Docker smoke jobs exercise. Source-only bundled plugin changes, test-only edits, and docs-only edits do not reserve Docker workers. The fast path builds the root Dockerfile image once, checks the CLI, runs the agents delete shared-workspace CLI smoke, runs the container gateway-network e2e, verifies a bundled extension build arg, and runs the bounded bundled-plugin Docker profile under a 240-second aggregate command timeout with each scenario's Docker run capped separately. The full path keeps QR package install and installer Docker/update coverage for nightly scheduled runs, manual dispatches, workflow-call release checks, and pull requests that truly touch installer/package/Docker surfaces. `main` pushes, including merge commits, do not force the full path; when changed-scope logic would request full coverage on a push, the workflow keeps the fast Docker smoke and leaves the full install smoke to nightly or release validation. The slow Bun global install image-provider smoke is separately gated by `run_bun_global_install_smoke`; it runs on the nightly schedule and from the release checks workflow, and manual `install-smoke` dispatches can opt into it, but pull requests and `main` pushes do not run it. QR and installer Docker tests keep their own install-focused Dockerfiles. Local `test:docker:all` prebuilds one shared live-test image and one shared `scripts/e2e/Dockerfile` built-app image, then runs the live/E2E smoke lanes with a weighted scheduler and `OPENCLAW_SKIP_DOCKER_BUILD=1`; tune the default main-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_PARALLELISM` and the provider-sensitive tail-pool slot count of 10 with `OPENCLAW_DOCKER_ALL_TAIL_PARALLELISM`. Heavy lane caps default to `OPENCLAW_DOCKER_ALL_LIVE_LIMIT=6`, `OPENCLAW_DOCKER_ALL_NPM_LIMIT=8`, and `OPENCLAW_DOCKER_ALL_SERVICE_LIMIT=7` so npm install and multi-service lanes do not overcommit Docker while lighter lanes still fill available slots. Lane starts are staggered by 2 seconds by default to avoid local Docker daemon create storms; override with `OPENCLAW_DOCKER_ALL_START_STAGGER_MS=0` or another millisecond value. The local aggregate preflights Docker, removes stale OpenClaw E2E containers, emits active-lane status, persists lane timings for longest-first ordering, and supports `OPENCLAW_DOCKER_ALL_DRY_RUN=1` for scheduler inspection. It stops scheduling new pooled lanes after the first failure by default, and each lane has a 120-minute fallback timeout overrideable with `OPENCLAW_DOCKER_ALL_LANE_TIMEOUT_MS`; selected live/tail lanes use tighter per-lane caps. `OPENCLAW_DOCKER_ALL_LANES=` runs exact scheduler lanes, including release-only lanes such as `install-e2e` and split bundled update lanes such as `bundled-channel-update-acpx`, while skipping the cleanup smoke so agents can reproduce one failed lane. The reusable live/E2E workflow builds and pushes one SHA-tagged GHCR Docker E2E image, then runs the release-path Docker suite as at most three chunked jobs with `OPENCLAW_SKIP_DOCKER_BUILD=1` so each chunk pulls the shared image once and executes multiple lanes through the same weighted scheduler (`OPENCLAW_DOCKER_ALL_PROFILE=release-path`, `OPENCLAW_DOCKER_ALL_CHUNK=core|package-update|plugins-integrations`). Each chunk uploads `.artifacts/docker-tests/` with lane logs, timings, `summary.json`, and per-lane rerun commands. The workflow `docker_lanes` input runs selected lanes against the prepared image instead of the three chunk jobs, which keeps failed-lane debugging bounded to one targeted Docker job; if a selected lane is a live Docker lane, the targeted job builds the live-test image locally for that rerun. When Open WebUI is requested with the release-path suite, it runs inside the plugins/integrations chunk instead of reserving a fourth Docker worker; Open WebUI keeps a standalone job only for openwebui-only dispatches. The scheduled live/E2E workflow runs the full release-path Docker suite daily. The bundled update matrix is split by update target so repeated npm update and doctor repair passes can shard with other bundled checks. Local changed-lane logic lives in `scripts/changed-lanes.mjs` and is executed by `scripts/check-changed.mjs`. That local gate is stricter about architecture boundaries than the broad CI platform scope: core production changes run core prod typecheck plus core tests, core test-only changes run only core test typecheck/tests, extension production changes run extension prod typecheck plus extension tests, and extension test-only changes run only extension test typecheck/tests. Public Plugin SDK or plugin-contract changes expand to extension validation because extensions depend on those core contracts. Release metadata-only version bumps run targeted version/config/root-dependency checks. Unknown root/config changes fail safe to all lanes. diff --git a/scripts/docker/install-sh-e2e/run.sh b/scripts/docker/install-sh-e2e/run.sh index 383722d6c35..ecc8af74cc5 100755 --- a/scripts/docker/install-sh-e2e/run.sh +++ b/scripts/docker/install-sh-e2e/run.sh @@ -544,6 +544,14 @@ run_profile() { } trap cleanup_profile EXIT + TURN1_JSON="/tmp/agent-${profile}-1.json" + TURN2_JSON="/tmp/agent-${profile}-2.json" + TURN2B_JSON="/tmp/agent-${profile}-2b.json" + TURN3_JSON="/tmp/agent-${profile}-3.json" + TURN3B_JSON="/tmp/agent-${profile}-3b.json" + TURN4_JSON="/tmp/agent-${profile}-4.json" + HEALTH_JSON="/tmp/health-${profile}.json" + echo "==> Wait for health ($profile)" for _ in $(seq 1 240); do if openclaw --profile "$profile" health --timeout 5000 --json >/dev/null 2>&1; then @@ -551,15 +559,13 @@ run_profile() { fi sleep 0.25 done - openclaw --profile "$profile" health --timeout 60000 --json >/dev/null + if ! openclaw --profile "$profile" health --timeout 60000 --json >"$HEALTH_JSON" 2>&1; then + echo "ERROR: gateway health failed ($profile, output=$HEALTH_JSON)" >&2 + dump_profile_debug "$profile" "$HEALTH_JSON" >&2 || true + return 1 + fi echo "==> Agent turns ($profile)" - TURN1_JSON="/tmp/agent-${profile}-1.json" - TURN2_JSON="/tmp/agent-${profile}-2.json" - TURN2B_JSON="/tmp/agent-${profile}-2b.json" - TURN3_JSON="/tmp/agent-${profile}-3.json" - TURN3B_JSON="/tmp/agent-${profile}-3b.json" - TURN4_JSON="/tmp/agent-${profile}-4.json" run_agent_turn "$profile" "$SESSION_ID" \ "Use the read tool (not exec) to read ${PROOF_TXT}. Reply with the exact contents only (no extra whitespace)." \ diff --git a/scripts/test-docker-all.mjs b/scripts/test-docker-all.mjs index 9372b5dd092..842a59223b8 100644 --- a/scripts/test-docker-all.mjs +++ b/scripts/test-docker-all.mjs @@ -458,6 +458,51 @@ function releasePathChunkLanes(chunk, options = {}) { ]; } +function allReleasePathLanes(options = {}) { + return Object.keys(releasePathChunks).flatMap((chunk) => + releasePathChunkLanes(chunk, { + includeOpenWebUI: chunk === "plugins-integrations" && options.includeOpenWebUI, + }), + ); +} + +function parseLaneSelection(raw) { + if (!raw) { + return []; + } + return [ + ...new Set( + String(raw) + .split(/[,\s]+/u) + .map((token) => token.trim()) + .filter(Boolean), + ), + ]; +} + +function dedupeLanes(poolLanes) { + const byName = new Map(); + for (const poolLane of poolLanes) { + if (!byName.has(poolLane.name)) { + byName.set(poolLane.name, poolLane); + } + } + return [...byName.values()]; +} + +function selectNamedLanes(poolLanes, selectedNames, label) { + const byName = new Map(poolLanes.map((poolLane) => [poolLane.name, poolLane])); + const missing = selectedNames.filter((name) => !byName.has(name)); + if (missing.length > 0) { + throw new Error( + `${label} unknown lane(s): ${missing.join(", ")}. Available lanes: ${[...byName.keys()] + .toSorted((a, b) => a.localeCompare(b)) + .join(", ")}`, + ); + } + return selectedNames.map((name) => byName.get(name)); +} + function parsePositiveInt(raw, fallback, label) { if (!raw) { return fallback; @@ -599,6 +644,18 @@ function shellQuote(value) { return `'${String(value).replaceAll("'", "'\\''")}'`; } +function buildLaneRerunCommand(name, baseEnv) { + const build = name.startsWith("live-") ? "1" : "0"; + const env = [ + ["OPENCLAW_DOCKER_ALL_LANES", name], + ["OPENCLAW_DOCKER_ALL_BUILD", build], + ["OPENCLAW_DOCKER_ALL_PREFLIGHT", "0"], + ["OPENCLAW_SKIP_DOCKER_BUILD", "1"], + ["OPENCLAW_DOCKER_E2E_IMAGE", baseEnv.OPENCLAW_DOCKER_E2E_IMAGE || DEFAULT_E2E_IMAGE], + ]; + return `${env.map(([key, value]) => `${key}=${shellQuote(value)}`).join(" ")} pnpm test:docker:all`; +} + function timingSeconds(timingStore, poolLane) { const fromStore = timingStore?.lanes?.[poolLane.name]?.durationSeconds; if (typeof fromStore === "number" && Number.isFinite(fromStore) && fromStore > 0) { @@ -985,6 +1042,7 @@ async function runLane(lane, baseEnv, logDir, fallbackTimeoutMs) { logFile, name, elapsedSeconds, + rerunCommand: buildLaneRerunCommand(name, baseEnv), status: result.status, timedOut: result.timedOut, }; @@ -1244,6 +1302,12 @@ async function main() { process.env.OPENCLAW_DOCKER_ALL_INCLUDE_OPENWEBUI ?? process.env.INCLUDE_OPENWEBUI, true, ); + const selectedLaneNamesRaw = + process.env.OPENCLAW_DOCKER_ALL_LANES || process.env.DOCKER_E2E_LANES || ""; + const selectedLaneNames = parseLaneSelection(selectedLaneNamesRaw); + if (selectedLaneNamesRaw && selectedLaneNames.length === 0) { + throw new Error("OPENCLAW_DOCKER_ALL_LANES must include at least one lane name"); + } const liveMode = parseLiveMode(process.env.OPENCLAW_DOCKER_ALL_LIVE_MODE); const liveRetries = parseNonNegativeInt( process.env.OPENCLAW_DOCKER_ALL_LIVE_RETRIES, @@ -1271,19 +1335,34 @@ async function main() { const retriedMainLanes = applyLiveRetries(lanes, liveRetries); const retriedTailLanes = applyLiveRetries(tailLanes, liveRetries); const releaseLanes = - profile === RELEASE_PATH_PROFILE + selectedLaneNames.length === 0 && profile === RELEASE_PATH_PROFILE ? releasePathChunkLanes(releaseChunk, { includeOpenWebUI }) : undefined; - const configuredLanes = releaseLanes - ? releaseLanes - : liveMode === "only" - ? applyLiveMode([...retriedMainLanes, ...retriedTailLanes], liveMode) - : applyLiveMode(retriedMainLanes, liveMode); - const configuredTailLanes = releaseLanes - ? [] - : liveMode === "only" + const selectedLanes = + selectedLaneNames.length > 0 + ? selectNamedLanes( + dedupeLanes([ + ...allReleasePathLanes({ includeOpenWebUI }), + ...retriedMainLanes, + ...retriedTailLanes, + ]), + selectedLaneNames, + "OPENCLAW_DOCKER_ALL_LANES", + ) + : undefined; + const configuredLanes = selectedLanes + ? selectedLanes + : releaseLanes + ? releaseLanes + : liveMode === "only" + ? applyLiveMode([...retriedMainLanes, ...retriedTailLanes], liveMode) + : applyLiveMode(retriedMainLanes, liveMode); + const configuredTailLanes = + selectedLanes || releaseLanes ? [] - : applyLiveMode(retriedTailLanes, liveMode); + : liveMode === "only" + ? [] + : applyLiveMode(retriedTailLanes, liveMode); const orderedLanes = orderLanes(configuredLanes, timingStore); const orderedTailLanes = orderLanes(configuredTailLanes, timingStore); @@ -1307,6 +1386,9 @@ async function main() { if (profile === RELEASE_PATH_PROFILE) { console.log(`==> Include Open WebUI: ${includeOpenWebUI ? "yes" : "no"}`); } + if (selectedLaneNames.length > 0) { + console.log(`==> Selected lanes: ${selectedLaneNames.join(", ")}`); + } console.log(`==> Docker lane timings: ${timingStore.enabled ? timingsFile : "disabled"}`); console.log(`==> Live-test bundled plugin deps: ${baseEnv.OPENCLAW_DOCKER_BUILD_EXTENSIONS}`); const schedulerOptions = parseSchedulerOptions(process.env, parallelism); @@ -1332,13 +1414,16 @@ async function main() { if (buildEnabled) { const buildEntries = []; - if ([...orderedLanes, ...orderedTailLanes].some((poolLane) => poolLane.live)) { + const scheduledLanes = [...orderedLanes, ...orderedTailLanes]; + if (scheduledLanes.some((poolLane) => poolLane.live)) { buildEntries.push(["Build shared live-test image once", "pnpm test:docker:live-build"]); } - buildEntries.push([ - `Build shared Docker E2E image once: ${baseEnv.OPENCLAW_DOCKER_E2E_IMAGE}`, - "pnpm test:docker:e2e-build", - ]); + if (scheduledLanes.some((poolLane) => !poolLane.live)) { + buildEntries.push([ + `Build shared Docker E2E image once: ${baseEnv.OPENCLAW_DOCKER_E2E_IMAGE}`, + "pnpm test:docker:e2e-build", + ]); + } await runForegroundGroup(buildEntries, baseEnv); } else { console.log(`==> Shared Docker image builds: skipped`); @@ -1368,6 +1453,7 @@ async function main() { image: baseEnv.OPENCLAW_DOCKER_E2E_IMAGE, lanes: allResults, profile, + selectedLanes: selectedLaneNames.length > 0 ? selectedLaneNames : undefined, startedAt: runStartedAt, status: "failed", }); @@ -1395,6 +1481,7 @@ async function main() { image: baseEnv.OPENCLAW_DOCKER_E2E_IMAGE, lanes: allResults, profile, + selectedLanes: selectedLaneNames.length > 0 ? selectedLaneNames : undefined, startedAt: runStartedAt, status: "failed", }); @@ -1402,14 +1489,14 @@ async function main() { process.exit(1); } - if (profile === DEFAULT_PROFILE) { + if (profile === DEFAULT_PROFILE && selectedLaneNames.length === 0) { await runForeground( "Run cleanup smoke after parallel lanes", "pnpm test:docker:cleanup", baseEnv, ); } else { - console.log("==> Cleanup smoke after parallel lanes: skipped for release-path chunk"); + console.log("==> Cleanup smoke after parallel lanes: skipped for selected/release lanes"); } await writeTimingStore(timingStore, allResults); await writeRunSummary(logDir, { @@ -1418,6 +1505,7 @@ async function main() { image: baseEnv.OPENCLAW_DOCKER_E2E_IMAGE, lanes: allResults, profile, + selectedLanes: selectedLaneNames.length > 0 ? selectedLaneNames : undefined, startedAt: runStartedAt, status: "passed", });