ci: fan out qa lab lanes

2026-04-28 20:46:57 +02:00 · 2026-04-23 06:14:54 +01:00
parent 76ab7c5b05
commit c78562d8a2
6 changed files with 224 additions and 21 deletions
--- a/.github/workflows/openclaw-release-checks.yml
+++ b/.github/workflows/openclaw-release-checks.yml
@@ -123,7 +123,7 @@ jobs:
            echo "- Validated SHA: \`${RELEASE_SHA}\`"
            echo "- Cross-OS provider: \`${RELEASE_PROVIDER}\`"
            echo "- Cross-OS mode: \`${RELEASE_MODE}\`"
-            echo "- This run will execute cross-OS release validation, QA Lab parity/live lanes, and the non-Parallels Docker/live/openwebui coverage from the CI migration plan."
+            echo "- This run will execute cross-OS release validation, QA Lab parity, Matrix, and Telegram lanes, and the non-Parallels Docker/live/openwebui coverage from the CI migration plan."
          } >> "$GITHUB_STEP_SUMMARY"

  cross_os_release_checks:
@@ -274,6 +274,76 @@ jobs:
          retention-days: 14
          if-no-files-found: warn

+  qa_live_matrix_release_checks:
+    name: Run QA Lab live Matrix lane
+    needs: [resolve_target]
+    runs-on: blacksmith-32vcpu-ubuntu-2404
+    timeout-minutes: 60
+    permissions:
+      contents: read
+      pull-requests: read
+    environment: qa-live-shared
+    env:
+      OPENCLAW_BUILD_PRIVATE_QA: "1"
+      OPENCLAW_ENABLE_PRIVATE_QA_CLI: "1"
+    steps:
+      - name: Checkout selected ref
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ needs.resolve_target.outputs.ref }}
+          fetch-depth: 1
+
+      - name: Setup Node environment
+        uses: ./.github/actions/setup-node-env
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          pnpm-version: ${{ env.PNPM_VERSION }}
+          install-bun: "true"
+
+      - name: Validate required QA credential env
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          if [[ -z "${OPENAI_API_KEY:-}" ]]; then
+            echo "Missing required OPENAI_API_KEY." >&2
+            exit 1
+          fi
+
+      - name: Build private QA runtime
+        run: pnpm build
+
+      - name: Run Matrix live lane
+        id: run_lane
+        shell: bash
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1"
+        run: |
+          set -euo pipefail
+
+          output_dir=".artifacts/qa-e2e/matrix-live-release-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
+          echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT"
+
+          pnpm openclaw qa matrix \
+            --repo-root . \
+            --output-dir "${output_dir}" \
+            --provider-mode live-frontier \
+            --model openai/gpt-5.4 \
+            --alt-model openai/gpt-5.4 \
+            --fast
+
+      - name: Upload Matrix QA artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-qa-live-matrix-${{ needs.resolve_target.outputs.sha }}
+          path: ${{ steps.run_lane.outputs.output_dir }}
+          retention-days: 14
+          if-no-files-found: warn
+
  qa_live_telegram_release_checks:
    name: Run QA Lab live Telegram lane
    needs: [resolve_target]
--- a/.github/workflows/parity-gate.yml
+++ b/.github/workflows/parity-gate.yml
@@ -13,8 +13,6 @@ on:
      - "src/gateway/**"
      - "src/media/**"
      - ".github/workflows/parity-gate.yml"
-  schedule:
-    - cron: "17 3 * * *"
  workflow_dispatch:

 permissions:
--- a/.github/workflows/qa-live-telegram-convex.yml
+++ b/.github/workflows/qa-live-telegram-convex.yml
@@ -1,4 +1,4 @@
-name: QA-Lab - Live Telegram, Live Frontier
+name: QA-Lab - All Lanes

 on:
  schedule:
@@ -20,7 +20,7 @@ permissions:
  pull-requests: read

 concurrency:
-  group: qa-lab-live-telegram-live-frontier-${{ github.event_name == 'workflow_dispatch' && inputs.ref || github.sha }}
+  group: qa-lab-all-lanes-${{ github.event_name == 'workflow_dispatch' && inputs.ref || github.sha }}
  cancel-in-progress: false

 env:
@@ -115,6 +115,140 @@ jobs:
            echo "Trust reason: \`$trusted_reason\`"
          } >> "$GITHUB_STEP_SUMMARY"

+  run_mock_parity:
+    name: Run QA Lab parity gate
+    needs: [validate_selected_ref]
+    runs-on: blacksmith-32vcpu-ubuntu-2404
+    timeout-minutes: 30
+    env:
+      QA_PARITY_CONCURRENCY: "1"
+      OPENCLAW_QA_TRANSPORT_READY_TIMEOUT_MS: "180000"
+      OPENAI_API_KEY: ""
+      ANTHROPIC_API_KEY: ""
+      OPENCLAW_LIVE_OPENAI_KEY: ""
+      OPENCLAW_LIVE_ANTHROPIC_KEY: ""
+      OPENCLAW_LIVE_GEMINI_KEY: ""
+      OPENCLAW_LIVE_SETUP_TOKEN_VALUE: ""
+    steps:
+      - name: Checkout selected ref
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ needs.validate_selected_ref.outputs.selected_sha }}
+          fetch-depth: 1
+
+      - name: Setup Node environment
+        uses: ./.github/actions/setup-node-env
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          pnpm-version: ${{ env.PNPM_VERSION }}
+          install-bun: "true"
+
+      - name: Build private QA runtime
+        run: pnpm build
+
+      - name: Run GPT-5.4 lane
+        run: |
+          pnpm openclaw qa suite \
+            --provider-mode mock-openai \
+            --parity-pack agentic \
+            --concurrency "${QA_PARITY_CONCURRENCY}" \
+            --model openai/gpt-5.4 \
+            --alt-model openai/gpt-5.4-alt \
+            --output-dir .artifacts/qa-e2e/gpt54
+
+      - name: Run Opus 4.6 lane
+        run: |
+          pnpm openclaw qa suite \
+            --provider-mode mock-openai \
+            --parity-pack agentic \
+            --concurrency "${QA_PARITY_CONCURRENCY}" \
+            --model anthropic/claude-opus-4-6 \
+            --alt-model anthropic/claude-sonnet-4-6 \
+            --output-dir .artifacts/qa-e2e/opus46
+
+      - name: Generate parity report
+        run: |
+          pnpm openclaw qa parity-report \
+            --repo-root . \
+            --candidate-summary .artifacts/qa-e2e/gpt54/qa-suite-summary.json \
+            --baseline-summary .artifacts/qa-e2e/opus46/qa-suite-summary.json \
+            --candidate-label openai/gpt-5.4 \
+            --baseline-label anthropic/claude-opus-4-6 \
+            --output-dir .artifacts/qa-e2e/parity
+
+      - name: Upload parity artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: qa-parity-${{ github.run_id }}-${{ github.run_attempt }}
+          path: .artifacts/qa-e2e/
+          retention-days: 14
+          if-no-files-found: warn
+
+  run_live_matrix:
+    name: Run Matrix live QA lane
+    needs: [authorize_actor, validate_selected_ref]
+    runs-on: blacksmith-32vcpu-ubuntu-2404
+    timeout-minutes: 60
+    environment: qa-live-shared
+    steps:
+      - name: Checkout selected ref
+        uses: actions/checkout@v6
+        with:
+          ref: ${{ needs.validate_selected_ref.outputs.selected_sha }}
+          fetch-depth: 1
+
+      - name: Setup Node environment
+        uses: ./.github/actions/setup-node-env
+        with:
+          node-version: ${{ env.NODE_VERSION }}
+          pnpm-version: ${{ env.PNPM_VERSION }}
+          install-bun: "true"
+
+      - name: Validate required QA credential env
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        shell: bash
+        run: |
+          set -euo pipefail
+
+          if [[ -z "${OPENAI_API_KEY:-}" ]]; then
+            echo "Missing required OPENAI_API_KEY." >&2
+            exit 1
+          fi
+
+      - name: Build private QA runtime
+        run: pnpm build
+
+      - name: Run Matrix live lane
+        id: run_lane
+        shell: bash
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          OPENCLAW_QA_REDACT_PUBLIC_METADATA: "1"
+        run: |
+          set -euo pipefail
+
+          output_dir=".artifacts/qa-e2e/matrix-live-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
+          echo "output_dir=${output_dir}" >> "$GITHUB_OUTPUT"
+
+          pnpm openclaw qa matrix \
+            --repo-root . \
+            --output-dir "${output_dir}" \
+            --provider-mode live-frontier \
+            --model openai/gpt-5.4 \
+            --alt-model openai/gpt-5.4 \
+            --fast
+
+      - name: Upload Matrix QA artifacts
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: qa-live-matrix-${{ github.run_id }}-${{ github.run_attempt }}
+          path: ${{ steps.run_lane.outputs.output_dir }}
+          retention-days: 14
+          if-no-files-found: warn
+
  run_live_telegram:
    name: Run Telegram live QA lane with Convex leases
    needs: [authorize_actor, validate_selected_ref]
--- a/docs/ci.md
+++ b/docs/ci.md
@@ -10,13 +10,14 @@ read_when:

 The CI runs on every push to `main` and every pull request. It uses smart scoping to skip expensive jobs when only unrelated areas changed.

-QA Lab has two dedicated CI lanes outside the main smart-scoped workflow. The
-`Parity gate` workflow runs on matching PR changes, every night on `main`, and
-manual dispatch; it builds the private QA runtime and compares the mock
-GPT-5.4 and Opus 4.6 agentic packs. The `QA-Lab - Live Telegram, Live Frontier`
-workflow runs nightly on `main` and on manual dispatch; it uses the
-`qa-live-shared` environment plus Convex leases for the live Telegram lane.
-`OpenClaw Release Checks` also runs both QA Lab lanes before release approval.
+QA Lab has dedicated CI lanes outside the main smart-scoped workflow. The
+`Parity gate` workflow runs on matching PR changes and manual dispatch; it
+builds the private QA runtime and compares the mock GPT-5.4 and Opus 4.6
+agentic packs. The `QA-Lab - All Lanes` workflow runs nightly on `main` and on
+manual dispatch; it fans out the mock parity gate, live Matrix lane, and live
+Telegram lane as parallel jobs. The live jobs use the `qa-live-shared`
+environment, and the Telegram lane uses Convex leases. `OpenClaw Release
+Checks` also runs the same QA Lab lanes before release approval.

 ## Job Overview

--- a/docs/help/testing.md
+++ b/docs/help/testing.md
@@ -51,11 +51,11 @@ Tip: when you only need one failing case, prefer narrowing live tests via the al

 These commands sit beside the main test suites when you need QA-lab realism:

-CI runs QA Lab in dedicated workflows. `Parity gate` runs on matching PRs,
-nightly on `main`, and from manual dispatch with mock providers. `QA-Lab - Live
-Telegram, Live Frontier` runs nightly on `main` and from manual dispatch with
-Convex-managed live Telegram credentials. `OpenClaw Release Checks` runs both
-lanes before release approval.
+CI runs QA Lab in dedicated workflows. `Parity gate` runs on matching PRs and
+from manual dispatch with mock providers. `QA-Lab - All Lanes` runs nightly on
+`main` and from manual dispatch with the mock parity gate, live Matrix lane, and
+Convex-managed live Telegram lane as parallel jobs. `OpenClaw Release Checks`
+runs the same lanes before release approval.

 - `pnpm openclaw qa suite`
  - Runs repo-backed QA scenarios directly on the host.
--- a/docs/reference/RELEASING.md
+++ b/docs/reference/RELEASING.md
@@ -54,9 +54,9 @@ OpenClaw has three public release lanes:
 - Run `pnpm release:check` before every tagged release
 - Release checks now run in a separate manual workflow:
  `OpenClaw Release Checks`
- `OpenClaw Release Checks` also runs the QA Lab mock parity gate and the live
-  Telegram QA lane before release approval. The live lane uses the
-  `qa-live-shared` environment and Convex CI credential leases.
+- `OpenClaw Release Checks` also runs the QA Lab mock parity gate plus the live
+  Matrix and Telegram QA lanes before release approval. The live lanes use the
+  `qa-live-shared` environment; Telegram also uses Convex CI credential leases.
 - Cross-OS install and upgrade runtime validation is dispatched from the
  private caller workflow
  `openclaw/releases-private/.github/workflows/openclaw-cross-os-release-checks.yml`,
@@ -169,7 +169,7 @@ When cutting a stable npm release:
   when you intentionally want a direct stable publish
 3. Run `OpenClaw Release Checks` separately with the same tag or the
   full current workflow-branch commit SHA when you want live prompt cache,
-   QA Lab parity, and live Telegram coverage
+   QA Lab parity, Matrix, and Telegram coverage
   - This is separate on purpose so live coverage stays available without
     recoupling long-running or flaky checks to the publish workflow
 4. Save the successful `preflight_run_id`