From c78562d8a2d09d657d6c1fe10205eadca43d5798 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 23 Apr 2026 06:14:54 +0100 Subject: [PATCH] ci: fan out qa lab lanes --- .github/workflows/openclaw-release-checks.yml | 72 ++++++++- .github/workflows/parity-gate.yml | 2 - .github/workflows/qa-live-telegram-convex.yml | 138 +++++++++++++++++- docs/ci.md | 15 +- docs/help/testing.md | 10 +- docs/reference/RELEASING.md | 8 +- 6 files changed, 224 insertions(+), 21 deletions(-) diff --git a/.github/workflows/openclaw-release-checks.yml b/.github/workflows/openclaw-release-checks.yml index f1abaae0b2c..3cc744c4854 100644 --- a/.github/workflows/openclaw-release-checks.yml +++ b/.github/workflows/openclaw-release-checks.yml @@ -123,7 +123,7 @@ jobs: echo "- Validated SHA: \`${RELEASE_SHA}\`" echo "- Cross-OS provider: \`${RELEASE_PROVIDER}\`" echo "- Cross-OS mode: \`${RELEASE_MODE}\`" - echo "- This run will execute cross-OS release validation, QA Lab parity/live lanes, and the non-Parallels Docker/live/openwebui coverage from the CI migration plan." + echo "- This run will execute cross-OS release validation, QA Lab parity, Matrix, and Telegram lanes, and the non-Parallels Docker/live/openwebui coverage from the CI migration plan." } >> "$GITHUB_STEP_SUMMARY" cross_os_release_checks: @@ -274,6 +274,76 @@ jobs: retention-days: 14 if-no-files-found: warn + qa_live_matrix_release_checks: + name: Run QA Lab live Matrix lane + needs: [resolve_target] + runs-on: blacksmith-32vcpu-ubuntu-2404 + timeout-minutes: 60 + permissions: + contents: read + pull-requests: read + environment: qa-live-shared + env: + OPENCLAW_BUILD_PRIVATE_QA: "1" + OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1" + steps: + - name: Checkout selected ref + uses: actions/checkout@v6 + with: + ref: ${{ needs.resolve_target.outputs.ref }} + fetch-depth: 1 + + - name: Setup Node environment + uses: ./.github/actions/setup-node-env + with: + node-version: ${{ env.NODE_VERSION }} + pnpm-version: ${{ env.PNPM_VERSION }} + install-bun: "true" + + - name: Validate required QA credential env + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + shell: bash + run: | + set -euo pipefail + + if [[ -z "${OPENAI_API_KEY:-}" ]]; then + echo "Missing required OPENAI_API_KEY." >&2 + exit 1 + fi + + - name: Build private QA runtime + run: pnpm build + + - name: Run Matrix live lane + id: run_lane + shell: bash + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1" + run: | + set -euo pipefail + + output_dir=".artifacts/qa-e2e/matrix-live-release-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT" + + pnpm openclaw qa matrix \ + --repo-root . \ + --output-dir "${output_dir}" \ + --provider-mode live-frontier \ + --model openai/gpt-5.4 \ + --alt-model openai/gpt-5.4 \ + --fast + + - name: Upload Matrix QA artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: release-qa-live-matrix-${{ needs.resolve_target.outputs.sha }} + path: ${{ steps.run_lane.outputs.output_dir }} + retention-days: 14 + if-no-files-found: warn + qa_live_telegram_release_checks: name: Run QA Lab live Telegram lane needs: [resolve_target] diff --git a/.github/workflows/parity-gate.yml b/.github/workflows/parity-gate.yml index 8faa935e5a1..f7df85a8dc4 100644 --- a/.github/workflows/parity-gate.yml +++ b/.github/workflows/parity-gate.yml @@ -13,8 +13,6 @@ on: - "src/gateway/**" - "src/media/**" - ".github/workflows/parity-gate.yml" - schedule: - - cron: "17 3 * * *" workflow_dispatch: permissions: diff --git a/.github/workflows/qa-live-telegram-convex.yml b/.github/workflows/qa-live-telegram-convex.yml index bf3c17670d5..04deb95dc69 100644 --- a/.github/workflows/qa-live-telegram-convex.yml +++ b/.github/workflows/qa-live-telegram-convex.yml @@ -1,4 +1,4 @@ -name: QA-Lab - Live Telegram, Live Frontier +name: QA-Lab - All Lanes on: schedule: @@ -20,7 +20,7 @@ permissions: pull-requests: read concurrency: - group: qa-lab-live-telegram-live-frontier-${{ github.event_name == 'workflow_dispatch' && inputs.ref || github.sha }} + group: qa-lab-all-lanes-${{ github.event_name == 'workflow_dispatch' && inputs.ref || github.sha }} cancel-in-progress: false env: @@ -115,6 +115,140 @@ jobs: echo "Trust reason: \`$trusted_reason\`" } >> "$GITHUB_STEP_SUMMARY" + run_mock_parity: + name: Run QA Lab parity gate + needs: [validate_selected_ref] + runs-on: blacksmith-32vcpu-ubuntu-2404 + timeout-minutes: 30 + env: + QA_PARITY_CONCURRENCY: "1" + OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000" + OPENAI_API_KEY: "" + ANTHROPIC_API_KEY: "" + OPENCLAW_LIVE_OPENAI_KEY: "" + OPENCLAW_LIVE_ANTHROPIC_KEY: "" + OPENCLAW_LIVE_GEMINI_KEY: "" + OPENCLAW_LIVE_SETUP_TOKEN_VALUE: "" + steps: + - name: Checkout selected ref + uses: actions/checkout@v6 + with: + ref: ${{ needs.validate_selected_ref.outputs.selected_sha }} + fetch-depth: 1 + + - name: Setup Node environment + uses: ./.github/actions/setup-node-env + with: + node-version: ${{ env.NODE_VERSION }} + pnpm-version: ${{ env.PNPM_VERSION }} + install-bun: "true" + + - name: Build private QA runtime + run: pnpm build + + - name: Run GPT-5.4 lane + run: | + pnpm openclaw qa suite \ + --provider-mode mock-openai \ + --parity-pack agentic \ + --concurrency "${QA_PARITY_CONCURRENCY}" \ + --model openai/gpt-5.4 \ + --alt-model openai/gpt-5.4-alt \ + --output-dir .artifacts/qa-e2e/gpt54 + + - name: Run Opus 4.6 lane + run: | + pnpm openclaw qa suite \ + --provider-mode mock-openai \ + --parity-pack agentic \ + --concurrency "${QA_PARITY_CONCURRENCY}" \ + --model anthropic/claude-opus-4-6 \ + --alt-model anthropic/claude-sonnet-4-6 \ + --output-dir .artifacts/qa-e2e/opus46 + + - name: Generate parity report + run: | + pnpm openclaw qa parity-report \ + --repo-root . \ + --candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \ + --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \ + --candidate-label openai/gpt-5.4 \ + --baseline-label anthropic/claude-opus-4-6 \ + --output-dir .artifacts/qa-e2e/parity + + - name: Upload parity artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: qa-parity-${{ github.run_id }}-${{ github.run_attempt }} + path: .artifacts/qa-e2e/ + retention-days: 14 + if-no-files-found: warn + + run_live_matrix: + name: Run Matrix live QA lane + needs: [authorize_actor, validate_selected_ref] + runs-on: blacksmith-32vcpu-ubuntu-2404 + timeout-minutes: 60 + environment: qa-live-shared + steps: + - name: Checkout selected ref + uses: actions/checkout@v6 + with: + ref: ${{ needs.validate_selected_ref.outputs.selected_sha }} + fetch-depth: 1 + + - name: Setup Node environment + uses: ./.github/actions/setup-node-env + with: + node-version: ${{ env.NODE_VERSION }} + pnpm-version: ${{ env.PNPM_VERSION }} + install-bun: "true" + + - name: Validate required QA credential env + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + shell: bash + run: | + set -euo pipefail + + if [[ -z "${OPENAI_API_KEY:-}" ]]; then + echo "Missing required OPENAI_API_KEY." >&2 + exit 1 + fi + + - name: Build private QA runtime + run: pnpm build + + - name: Run Matrix live lane + id: run_lane + shell: bash + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1" + run: | + set -euo pipefail + + output_dir=".artifacts/qa-e2e/matrix-live-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}" + echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT" + + pnpm openclaw qa matrix \ + --repo-root . \ + --output-dir "${output_dir}" \ + --provider-mode live-frontier \ + --model openai/gpt-5.4 \ + --alt-model openai/gpt-5.4 \ + --fast + + - name: Upload Matrix QA artifacts + if: always() + uses: actions/upload-artifact@v4 + with: + name: qa-live-matrix-${{ github.run_id }}-${{ github.run_attempt }} + path: ${{ steps.run_lane.outputs.output_dir }} + retention-days: 14 + if-no-files-found: warn + run_live_telegram: name: Run Telegram live QA lane with Convex leases needs: [authorize_actor, validate_selected_ref] diff --git a/docs/ci.md b/docs/ci.md index 3080ccebb4a..390228e8c4c 100644 --- a/docs/ci.md +++ b/docs/ci.md @@ -10,13 +10,14 @@ read_when: The CI runs on every push to `main` and every pull request. It uses smart scoping to skip expensive jobs when only unrelated areas changed. -QA Lab has two dedicated CI lanes outside the main smart-scoped workflow. The -`Parity gate` workflow runs on matching PR changes, every night on `main`, and -manual dispatch; it builds the private QA runtime and compares the mock -GPT-5.4 and Opus 4.6 agentic packs. The `QA-Lab - Live Telegram, Live Frontier` -workflow runs nightly on `main` and on manual dispatch; it uses the -`qa-live-shared` environment plus Convex leases for the live Telegram lane. -`OpenClaw Release Checks` also runs both QA Lab lanes before release approval. +QA Lab has dedicated CI lanes outside the main smart-scoped workflow. The +`Parity gate` workflow runs on matching PR changes and manual dispatch; it +builds the private QA runtime and compares the mock GPT-5.4 and Opus 4.6 +agentic packs. The `QA-Lab - All Lanes` workflow runs nightly on `main` and on +manual dispatch; it fans out the mock parity gate, live Matrix lane, and live +Telegram lane as parallel jobs. The live jobs use the `qa-live-shared` +environment, and the Telegram lane uses Convex leases. `OpenClaw Release +Checks` also runs the same QA Lab lanes before release approval. ## Job Overview diff --git a/docs/help/testing.md b/docs/help/testing.md index 0346b485ea2..0032ddb2cf6 100644 --- a/docs/help/testing.md +++ b/docs/help/testing.md @@ -51,11 +51,11 @@ Tip: when you only need one failing case, prefer narrowing live tests via the al These commands sit beside the main test suites when you need QA-lab realism: -CI runs QA Lab in dedicated workflows. `Parity gate` runs on matching PRs, -nightly on `main`, and from manual dispatch with mock providers. `QA-Lab - Live -Telegram, Live Frontier` runs nightly on `main` and from manual dispatch with -Convex-managed live Telegram credentials. `OpenClaw Release Checks` runs both -lanes before release approval. +CI runs QA Lab in dedicated workflows. `Parity gate` runs on matching PRs and +from manual dispatch with mock providers. `QA-Lab - All Lanes` runs nightly on +`main` and from manual dispatch with the mock parity gate, live Matrix lane, and +Convex-managed live Telegram lane as parallel jobs. `OpenClaw Release Checks` +runs the same lanes before release approval. - `pnpm openclaw qa suite` - Runs repo-backed QA scenarios directly on the host. diff --git a/docs/reference/RELEASING.md b/docs/reference/RELEASING.md index b4ede21830b..ffc7fecde8c 100644 --- a/docs/reference/RELEASING.md +++ b/docs/reference/RELEASING.md @@ -54,9 +54,9 @@ OpenClaw has three public release lanes: - Run `pnpm release:check` before every tagged release - Release checks now run in a separate manual workflow: `OpenClaw Release Checks` -- `OpenClaw Release Checks` also runs the QA Lab mock parity gate and the live - Telegram QA lane before release approval. The live lane uses the - `qa-live-shared` environment and Convex CI credential leases. +- `OpenClaw Release Checks` also runs the QA Lab mock parity gate plus the live + Matrix and Telegram QA lanes before release approval. The live lanes use the + `qa-live-shared` environment; Telegram also uses Convex CI credential leases. - Cross-OS install and upgrade runtime validation is dispatched from the private caller workflow `openclaw/releases-private/.github/workflows/openclaw-cross-os-release-checks.yml`, @@ -169,7 +169,7 @@ When cutting a stable npm release: when you intentionally want a direct stable publish 3. Run `OpenClaw Release Checks` separately with the same tag or the full current workflow-branch commit SHA when you want live prompt cache, - QA Lab parity, and live Telegram coverage + QA Lab parity, Matrix, and Telegram coverage - This is separate on purpose so live coverage stays available without recoupling long-running or flaky checks to the publish workflow 4. Save the successful `preflight_run_id`