diff --git a/.agents/skills/openclaw-qa-testing/SKILL.md b/.agents/skills/openclaw-qa-testing/SKILL.md
index d0201975c56..bf006793641 100644
--- a/.agents/skills/openclaw-qa-testing/SKILL.md
+++ b/.agents/skills/openclaw-qa-testing/SKILL.md
@@ -57,12 +57,11 @@ Use `qa character-eval` for style/persona/vibe checks across multiple live model
 pnpm openclaw qa character-eval \
   --model openai/gpt-5.4,thinking=xhigh \
   --model openai/gpt-5.2,thinking=xhigh \
+  --model openai/gpt-5,thinking=xhigh \
   --model anthropic/claude-opus-4-6,thinking=high \
   --model anthropic/claude-sonnet-4-6,thinking=high \
-  --model minimax/MiniMax-M2.7,thinking=high \
   --model zai/glm-5.1,thinking=high \
   --model moonshot/kimi-k2.5,thinking=high \
-  --model qwen/qwen3.5-plus,thinking=high \
   --model google/gemini-3.1-pro-preview,thinking=high \
   --judge-model openai/gpt-5.4,thinking=xhigh,fast \
   --judge-model anthropic/claude-opus-4-6,thinking=high \
@@ -74,7 +73,7 @@ pnpm openclaw qa character-eval \
 - Runs local QA gateway child processes, not Docker.
 - Preferred model spec syntax is `provider/model,thinking=<level>[,fast|,no-fast|,fast=<bool>]` for both `--model` and `--judge-model`.
 - Do not add new examples with separate `--model-thinking`; keep that flag as legacy compatibility only.
-- Defaults to candidate models `openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, `qwen/qwen3.5-plus`, and `google/gemini-3.1-pro-preview` when no `--model` is passed.
+- Defaults to candidate models `openai/gpt-5.4`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-6`, `anthropic/claude-sonnet-4-6`, `zai/glm-5.1`, `moonshot/kimi-k2.5`, and `google/gemini-3.1-pro-preview` when no `--model` is passed.
 - Candidate thinking defaults to `high`, with `xhigh` for OpenAI models that support it. Prefer inline `--model provider/model,thinking=<level>`; `--thinking <level>` and `--model-thinking <provider/model=level>` remain compatibility shims.
 - OpenAI candidate refs default to fast mode so priority processing is used where supported. Use inline `,fast`, `,no-fast`, or `,fast=false` for one model; use `--fast` only to force fast mode for every candidate.
 - Judges default to `openai/gpt-5.4,thinking=xhigh,fast` and `anthropic/claude-opus-4-6,thinking=high`.
diff --git a/docs/concepts/qa-e2e-automation.md b/docs/concepts/qa-e2e-automation.md
index 27eca3a5ccd..becd26cd1d6 100644
--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -89,12 +89,11 @@ refs and write a judged Markdown report:
 pnpm openclaw qa character-eval \
   --model openai/gpt-5.4,thinking=xhigh \
   --model openai/gpt-5.2,thinking=xhigh \
+  --model openai/gpt-5,thinking=xhigh \
   --model anthropic/claude-opus-4-6,thinking=high \
   --model anthropic/claude-sonnet-4-6,thinking=high \
-  --model minimax/MiniMax-M2.7,thinking=high \
   --model zai/glm-5.1,thinking=high \
   --model moonshot/kimi-k2.5,thinking=high \
-  --model qwen/qwen3.5-plus,thinking=high \
   --model google/gemini-3.1-pro-preview,thinking=high \
   --judge-model openai/gpt-5.4,thinking=xhigh,fast \
   --judge-model anthropic/claude-opus-4-6,thinking=high \
@@ -128,9 +127,9 @@ Candidate and judge model runs both default to concurrency 16. Lower
 `--concurrency` or `--judge-concurrency` when provider limits or local gateway
 pressure make a run too noisy.
 When no candidate `--model` is passed, the character eval defaults to
-`openai/gpt-5.4`, `openai/gpt-5.2`, `anthropic/claude-opus-4-6`,
-`anthropic/claude-sonnet-4-6`, `minimax/MiniMax-M2.7`, `zai/glm-5.1`,
-`moonshot/kimi-k2.5`, `qwen/qwen3.5-plus`, and
+`openai/gpt-5.4`, `openai/gpt-5.2`, `openai/gpt-5`, `anthropic/claude-opus-4-6`,
+`anthropic/claude-sonnet-4-6`, `zai/glm-5.1`,
+`moonshot/kimi-k2.5`, and
 `google/gemini-3.1-pro-preview` when no `--model` is passed.
 When no `--judge-model` is passed, the judges default to
 `openai/gpt-5.4,thinking=xhigh,fast` and
diff --git a/extensions/qa-lab/src/character-eval.test.ts b/extensions/qa-lab/src/character-eval.test.ts
index eaebb06b004..259b53d89ad 100644
--- a/extensions/qa-lab/src/character-eval.test.ts
+++ b/extensions/qa-lab/src/character-eval.test.ts
@@ -185,13 +185,12 @@ describe("runQaCharacterEval", () => {
         rankings: [
           { model: "openai/gpt-5.4", rank: 1, score: 8, summary: "ok" },
           { model: "openai/gpt-5.2", rank: 2, score: 7.5, summary: "ok" },
-          { model: "anthropic/claude-opus-4-6", rank: 3, score: 7, summary: "ok" },
-          { model: "anthropic/claude-sonnet-4-6", rank: 4, score: 6.8, summary: "ok" },
-          { model: "minimax/MiniMax-M2.7", rank: 5, score: 6.5, summary: "ok" },
+          { model: "openai/gpt-5", rank: 3, score: 7.2, summary: "ok" },
+          { model: "anthropic/claude-opus-4-6", rank: 4, score: 7, summary: "ok" },
+          { model: "anthropic/claude-sonnet-4-6", rank: 5, score: 6.8, summary: "ok" },
           { model: "zai/glm-5.1", rank: 6, score: 6.3, summary: "ok" },
           { model: "moonshot/kimi-k2.5", rank: 7, score: 6.2, summary: "ok" },
-          { model: "qwen/qwen3.5-plus", rank: 8, score: 6.1, summary: "ok" },
-          { model: "google/gemini-3.1-pro-preview", rank: 9, score: 6, summary: "ok" },
+          { model: "google/gemini-3.1-pro-preview", rank: 8, score: 6, summary: "ok" },
         ],
       }),
     );
@@ -204,23 +203,21 @@ describe("runQaCharacterEval", () => {
       runJudge,
     });
 
-    expect(runSuite).toHaveBeenCalledTimes(9);
+    expect(runSuite).toHaveBeenCalledTimes(8);
     expect(runSuite.mock.calls.map(([params]) => params.primaryModel)).toEqual([
       "openai/gpt-5.4",
       "openai/gpt-5.2",
+      "openai/gpt-5",
       "anthropic/claude-opus-4-6",
       "anthropic/claude-sonnet-4-6",
-      "minimax/MiniMax-M2.7",
       "zai/glm-5.1",
       "moonshot/kimi-k2.5",
-      "qwen/qwen3.5-plus",
       "google/gemini-3.1-pro-preview",
     ]);
     expect(runSuite.mock.calls.map(([params]) => params.thinkingDefault)).toEqual([
       "xhigh",
       "xhigh",
-      "high",
-      "high",
+      "xhigh",
       "high",
       "high",
       "high",
@@ -230,8 +227,7 @@ describe("runQaCharacterEval", () => {
     expect(runSuite.mock.calls.map(([params]) => params.fastMode)).toEqual([
       true,
       true,
-      false,
-      false,
+      true,
       false,
       false,
       false,
diff --git a/extensions/qa-lab/src/character-eval.ts b/extensions/qa-lab/src/character-eval.ts
index 384a3eeed07..8fb5fe63dfa 100644
--- a/extensions/qa-lab/src/character-eval.ts
+++ b/extensions/qa-lab/src/character-eval.ts
@@ -10,12 +10,11 @@ const DEFAULT_CHARACTER_SCENARIO_ID = "character-vibes-gollum";
 const DEFAULT_CHARACTER_EVAL_MODELS = Object.freeze([
   "openai/gpt-5.4",
   "openai/gpt-5.2",
+  "openai/gpt-5",
   "anthropic/claude-opus-4-6",
   "anthropic/claude-sonnet-4-6",
-  "minimax/MiniMax-M2.7",
   "zai/glm-5.1",
   "moonshot/kimi-k2.5",
-  "qwen/qwen3.5-plus",
   "google/gemini-3.1-pro-preview",
 ]);
 const DEFAULT_CHARACTER_THINKING: QaThinkingLevel = "high";
@@ -24,6 +23,7 @@ const DEFAULT_CHARACTER_THINKING_BY_MODEL: Readonly<Record<string, QaThinkingLev
   Object.freeze({
     "openai/gpt-5.4": "xhigh",
     "openai/gpt-5.2": "xhigh",
+    "openai/gpt-5": "xhigh",
   });
 const DEFAULT_JUDGE_MODELS = Object.freeze(["openai/gpt-5.4", "anthropic/claude-opus-4-6"]);
 const DEFAULT_JUDGE_THINKING: QaThinkingLevel = "xhigh";