fix(qwen): honor chat-template thinking level

This commit is contained in:
Peter Steinberger
2026-05-17 02:19:01 +01:00
parent f453904165
commit ffc7bda443
3 changed files with 137 additions and 17 deletions

View File

@@ -30,6 +30,7 @@ Docs: https://docs.openclaw.ai
- Providers/Anthropic-messages: extract `reasoning_content` from `thinking` blocks during assistant replay so proxy providers that route through the Anthropic-messages transport preserve reasoning context across tool-call follow-up turns. Thanks @Sunnyone2three.
- Mac app: let menu gateway/session error text wrap across a few lines and stop rebuilding dynamic Context/Gateway menu rows while the menu is open, reducing flicker.
- Mac app: make device pairing approval sheets friendlier, with concise Mac/device copy, shortened identifiers, friendly scope labels, and Approve as the primary action.
- Providers/Qwen: honor session thinking level for `qwen-chat-template` payloads so `/think off` disables nested llama.cpp chat-template thinking controls. Fixes #82768. Thanks @bfox55.
- Feishu/wiki: reject numeric wiki space IDs before creating Lark clients and keep numeric-looking IDs documented as quoted opaque strings, preventing JavaScript precision loss in knowledge base calls. Fixes #45301. (#82769) Thanks @hyspacex.
- Control UI: simplify Talk settings to Voice, Model, and Sensitivity defaults, with provider, transport, exact VAD, and timing controls behind Advanced.
- Telegram: let catch-all mention patterns match captionless group photos, so media-only group messages reach the agent when the group is intentionally configured to respond to all messages. Fixes #44833. (#82756) Thanks @IWhatsskill.

View File

@@ -5,6 +5,7 @@ import { createQwenThinkingWrapper, wrapQwenProviderStream } from "./stream.js";
function capturePayload(params: {
thinkingLevel?: "off" | "low" | "medium" | "high" | "xhigh" | "max";
thinkingFormat?: string;
reasoning?: unknown;
initialPayload?: Record<string, unknown>;
model?: Partial<Model<"openai-completions">>;
@@ -17,7 +18,11 @@ function capturePayload(params: {
return {} as ReturnType<StreamFn>;
};
const wrapped = createQwenThinkingWrapper(baseStreamFn, params.thinkingLevel ?? "high");
const wrapped = createQwenThinkingWrapper(
baseStreamFn,
params.thinkingLevel ?? "high",
params.thinkingFormat,
);
void wrapped(
{
api: "openai-completions",
@@ -56,6 +61,37 @@ describe("createQwenThinkingWrapper", () => {
expect(capturePayload({ thinkingLevel: "high" })).toEqual({ enable_thinking: true });
});
it("overrides qwen-chat-template thinking with the session level", () => {
expect(
capturePayload({
thinkingFormat: "qwen-chat-template",
thinkingLevel: "off",
initialPayload: {
chat_template_kwargs: { enable_thinking: true, preserve_thinking: true },
enable_thinking: true,
reasoning_effort: "high",
},
}),
).toEqual({
chat_template_kwargs: { enable_thinking: false, preserve_thinking: true },
});
});
it("uses the runtime model qwen-chat-template format when the wrapper context omits it", () => {
expect(
capturePayload({
thinkingLevel: "off",
model: { compat: { thinkingFormat: "qwen-chat-template" } },
initialPayload: {
chat_template_kwargs: { enable_thinking: true },
enable_thinking: true,
},
}),
).toEqual({
chat_template_kwargs: { enable_thinking: false, preserve_thinking: true },
});
});
it("skips non-reasoning and non-completions models", () => {
expect(capturePayload({ model: { reasoning: false } })).toStrictEqual({});
expect(capturePayload({ model: { api: "openai-responses" as never } })).toStrictEqual({});
@@ -64,19 +100,18 @@ describe("createQwenThinkingWrapper", () => {
describe("wrapQwenProviderStream", () => {
it("only registers for Qwen-family OpenAI-compatible providers", () => {
expect(
wrapQwenProviderStream({
provider: "qwencloud",
modelId: "qwen3.6-plus",
model: {
api: "openai-completions",
provider: "qwen",
id: "qwen3.6-plus",
reasoning: true,
} as Model<"openai-completions">,
streamFn: undefined,
} as never),
).toBeTypeOf("function");
const streamFn = wrapQwenProviderStream({
provider: "qwencloud",
modelId: "qwen3.6-plus",
model: {
api: "openai-completions",
provider: "qwen",
id: "qwen3.6-plus",
reasoning: true,
} as Model<"openai-completions">,
streamFn: undefined,
} as never);
expect(streamFn).toBeTypeOf("function");
expect(
wrapQwenProviderStream({
@@ -91,4 +126,46 @@ describe("wrapQwenProviderStream", () => {
} as never),
).toBeUndefined();
});
it("passes qwen-chat-template format to the Qwen wrapper", () => {
let captured: Record<string, unknown> = {};
const baseStreamFn: StreamFn = (_model, _context, options) => {
const payload = {
chat_template_kwargs: { enable_thinking: true },
enable_thinking: true,
};
options?.onPayload?.(payload, _model);
captured = payload;
return {} as ReturnType<StreamFn>;
};
const wrapped = wrapQwenProviderStream({
provider: "qwen",
modelId: "qwen3.6-plus",
model: {
api: "openai-completions",
provider: "qwen",
id: "qwen3.6-plus",
reasoning: true,
compat: { thinkingFormat: "qwen-chat-template" },
} as Model<"openai-completions">,
streamFn: baseStreamFn,
thinkingLevel: "off",
} as never);
void wrapped?.(
{
api: "openai-completions",
provider: "qwen",
id: "qwen3.6-plus",
reasoning: true,
} as Model<"openai-completions">,
{ messages: [] } as Context,
{},
);
expect(captured).toStrictEqual({
chat_template_kwargs: { enable_thinking: false, preserve_thinking: true },
});
});
});

View File

@@ -7,6 +7,7 @@ import {
} from "openclaw/plugin-sdk/provider-stream-shared";
type QwenThinkingLevel = ProviderWrapStreamFnContext["thinkingLevel"];
type QwenThinkingFormat = string | undefined;
function isQwenProviderId(providerId: string): boolean {
const normalized = normalizeProviderId(providerId);
@@ -18,15 +19,52 @@ function isQwenProviderId(providerId: string): boolean {
);
}
function setQwenChatTemplateThinking(payload: Record<string, unknown>, enabled: boolean): void {
const existing = payload.chat_template_kwargs;
if (existing && typeof existing === "object" && !Array.isArray(existing)) {
const next: Record<string, unknown> = {
...(existing as Record<string, unknown>),
enable_thinking: enabled,
};
if (!Object.hasOwn(next, "preserve_thinking")) {
next.preserve_thinking = true;
}
payload.chat_template_kwargs = next;
return;
}
payload.chat_template_kwargs = {
enable_thinking: enabled,
preserve_thinking: true,
};
}
function readQwenThinkingFormatFromModel(model: Parameters<StreamFn>[0]): QwenThinkingFormat {
if (model.api !== "openai-completions") {
return undefined;
}
const compat =
model.compat && typeof model.compat === "object"
? (model.compat as { thinkingFormat?: unknown })
: undefined;
return typeof compat?.thinkingFormat === "string" ? compat.thinkingFormat : undefined;
}
export function createQwenThinkingWrapper(
baseStreamFn: StreamFn | undefined,
thinkingLevel: QwenThinkingLevel,
thinkingFormat?: QwenThinkingFormat,
): StreamFn {
return createPayloadPatchStreamWrapper(
baseStreamFn,
({ payload: payloadObj, options }) => {
({ payload: payloadObj, model, options }) => {
const enableThinking = isOpenAICompatibleThinkingEnabled({ thinkingLevel, options });
payloadObj.enable_thinking = enableThinking;
const effectiveThinkingFormat = thinkingFormat ?? readQwenThinkingFormatFromModel(model);
if (effectiveThinkingFormat === "qwen-chat-template") {
setQwenChatTemplateThinking(payloadObj, enableThinking);
delete payloadObj.enable_thinking;
} else {
payloadObj.enable_thinking = enableThinking;
}
delete payloadObj.reasoning_effort;
delete payloadObj.reasoningEffort;
delete payloadObj.reasoning;
@@ -41,5 +79,9 @@ export function wrapQwenProviderStream(ctx: ProviderWrapStreamFnContext): Stream
if (!isQwenProviderId(ctx.provider) || (ctx.model && ctx.model.api !== "openai-completions")) {
return undefined;
}
return createQwenThinkingWrapper(ctx.streamFn, ctx.thinkingLevel);
return createQwenThinkingWrapper(
ctx.streamFn,
ctx.thinkingLevel,
ctx.model ? readQwenThinkingFormatFromModel(ctx.model) : undefined,
);
}