mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-02 14:40:27 +02:00
fix(config): accept video and audio model inputs
Preserve configured audio/video model input modalities through provider catalog normalization.\n\nFixes #20721.\nThanks @alvinttang.
This commit is contained in:
@@ -74,6 +74,8 @@ Docs: https://docs.openclaw.ai
|
||||
- Channels/status: keep read-only channel lists on manifest and package metadata by default, loading setup runtime only for explicit fallback callers. Thanks @shakkernerd.
|
||||
- Plugins/onboarding: defer onboarding install-record index writes until the guarded config commit so setup failures cannot leave the plugin index ahead of `openclaw.json`. Thanks @shakkernerd.
|
||||
- Plugins/registry: resolve web provider ownership from the installed plugin index instead of broad manifest scans on secret, tool, and pricing paths. Thanks @shakkernerd.
|
||||
- Config/providers: accept `video` and `audio` in configured model `input` values and
|
||||
preserve them in provider catalog entries. Fixes #20721. Thanks @alvinttang.
|
||||
- TTS: strip model-emitted TTS directives from streamed block text before channel
|
||||
delivery, including directives split across adjacent blocks, while preserving
|
||||
the accumulated raw reply for final-mode synthesis. Fixes #38937.
|
||||
|
||||
@@ -269,7 +269,7 @@ export type LmstudioModelBase = {
|
||||
trainedForToolUse: boolean;
|
||||
loaded: boolean;
|
||||
reasoning: boolean;
|
||||
input: ModelDefinitionConfig["input"];
|
||||
input: Array<"text" | "image">;
|
||||
cost: ModelDefinitionConfig["cost"];
|
||||
contextWindow: number;
|
||||
contextTokens: number;
|
||||
|
||||
@@ -822,6 +822,9 @@ export async function prepareLmstudioDynamicModels(
|
||||
provider: PROVIDER_ID,
|
||||
api: ctx.providerConfig?.api ?? `openai-completions`,
|
||||
baseUrl,
|
||||
input: model.input.filter(
|
||||
(entry): entry is "text" | "image" => entry === "text" || entry === "image",
|
||||
),
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
export type ModelInputType = "text" | "image" | "document";
|
||||
export type ModelInputType = "text" | "image" | "audio" | "video" | "document";
|
||||
|
||||
export type ModelCatalogEntry = {
|
||||
id: string;
|
||||
|
||||
@@ -2908,6 +2908,14 @@ export const GENERATED_BASE_CONFIG_SCHEMA: BaseConfigSchemaResponse = {
|
||||
type: "string",
|
||||
const: "image",
|
||||
},
|
||||
{
|
||||
type: "string",
|
||||
const: "video",
|
||||
},
|
||||
{
|
||||
type: "string",
|
||||
const: "audio",
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
@@ -80,7 +80,7 @@ export type ModelDefinitionConfig = {
|
||||
api?: ModelApi;
|
||||
baseUrl?: string;
|
||||
reasoning: boolean;
|
||||
input: Array<"text" | "image">;
|
||||
input: Array<"text" | "image" | "video" | "audio">;
|
||||
cost: {
|
||||
input: number;
|
||||
output: number;
|
||||
|
||||
@@ -312,7 +312,11 @@ export const ModelDefinitionSchema = z
|
||||
api: ModelApiSchema.optional(),
|
||||
baseUrl: z.string().min(1).optional(),
|
||||
reasoning: z.boolean().optional(),
|
||||
input: z.array(z.union([z.literal("text"), z.literal("image")])).optional(),
|
||||
input: z
|
||||
.array(
|
||||
z.union([z.literal("text"), z.literal("image"), z.literal("video"), z.literal("audio")]),
|
||||
)
|
||||
.optional(),
|
||||
cost: z
|
||||
.object({
|
||||
input: z.number().optional(),
|
||||
|
||||
@@ -41,7 +41,7 @@ export type LmstudioModelBase = {
|
||||
trainedForToolUse: boolean;
|
||||
loaded: boolean;
|
||||
reasoning: boolean;
|
||||
input: ModelDefinitionConfig["input"];
|
||||
input: Array<"text" | "image">;
|
||||
cost: ModelDefinitionConfig["cost"];
|
||||
contextWindow: number;
|
||||
contextTokens: number;
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
applyProviderNativeStreamingUsageCompat,
|
||||
readConfiguredProviderCatalogEntries,
|
||||
supportsNativeStreamingUsageCompat,
|
||||
} from "./provider-catalog-shared.js";
|
||||
import type { ModelDefinitionConfig } from "./provider-model-shared.js";
|
||||
@@ -54,3 +55,43 @@ describe("provider-catalog-shared native streaming usage compat", () => {
|
||||
expect(provider.models?.[1]?.compat?.supportsUsageInStreaming).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("provider-catalog-shared configured catalog entries", () => {
|
||||
it("preserves configured audio and video input modalities", () => {
|
||||
expect(
|
||||
readConfiguredProviderCatalogEntries({
|
||||
providerId: "kilocode",
|
||||
config: {
|
||||
models: {
|
||||
providers: {
|
||||
kilocode: {
|
||||
baseUrl: "https://api.kilo.ai/api/gateway/",
|
||||
api: "openai-completions",
|
||||
models: [
|
||||
{
|
||||
id: "google/gemini-3-pro-preview",
|
||||
name: "Gemini 3 Pro Preview",
|
||||
input: ["text", "image", "video", "audio"],
|
||||
reasoning: true,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 1048576,
|
||||
maxTokens: 65536,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}),
|
||||
).toEqual([
|
||||
{
|
||||
provider: "kilocode",
|
||||
id: "google/gemini-3-pro-preview",
|
||||
name: "Gemini 3 Pro Preview",
|
||||
input: ["text", "image", "video", "audio"],
|
||||
reasoning: true,
|
||||
contextWindow: 1048576,
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -23,7 +23,7 @@ export type ConfiguredProviderCatalogEntry = {
|
||||
provider: string;
|
||||
contextWindow?: number;
|
||||
reasoning?: boolean;
|
||||
input?: Array<"text" | "image" | "document">;
|
||||
input?: Array<"text" | "image" | "audio" | "video" | "document">;
|
||||
};
|
||||
|
||||
function normalizeConfiguredCatalogModelInput(
|
||||
@@ -33,8 +33,12 @@ function normalizeConfiguredCatalogModelInput(
|
||||
return undefined;
|
||||
}
|
||||
const normalized = input.filter(
|
||||
(item): item is "text" | "image" | "document" =>
|
||||
item === "text" || item === "image" || item === "document",
|
||||
(item): item is "text" | "image" | "audio" | "video" | "document" =>
|
||||
item === "text" ||
|
||||
item === "image" ||
|
||||
item === "audio" ||
|
||||
item === "video" ||
|
||||
item === "document",
|
||||
);
|
||||
return normalized.length > 0 ? normalized : undefined;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user