fix: guard speech provider fetches

This commit is contained in:
Peter Steinberger
2026-04-24 20:51:18 +01:00
parent 25ad66520b
commit 7425cb0549
3 changed files with 120 additions and 80 deletions

View File

@@ -18,6 +18,10 @@ import {
requireInRange,
trimToUndefined,
} from "openclaw/plugin-sdk/speech";
import {
fetchWithSsrFGuard,
ssrfPolicyFromHttpBaseUrlAllowedHostname,
} from "openclaw/plugin-sdk/ssrf-runtime";
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
import { resolveElevenLabsApiKeyWithProfileFallback } from "./config-api.js";
import { isValidElevenLabsVoiceId, normalizeElevenLabsBaseUrl } from "./shared.js";
@@ -293,30 +297,40 @@ export async function listElevenLabsVoices(params: {
apiKey: string;
baseUrl?: string;
}): Promise<SpeechVoiceOption[]> {
const res = await fetch(`${normalizeElevenLabsBaseUrl(params.baseUrl)}/v1/voices`, {
headers: {
"xi-api-key": params.apiKey,
const normalizedBaseUrl = normalizeElevenLabsBaseUrl(params.baseUrl);
const { response, release } = await fetchWithSsrFGuard({
url: `${normalizedBaseUrl}/v1/voices`,
init: {
headers: {
"xi-api-key": params.apiKey,
},
},
policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(normalizedBaseUrl),
auditContext: "elevenlabs.voices",
});
await assertOkOrThrowProviderError(res, "ElevenLabs voices API error");
const json = (await res.json()) as {
voices?: Array<{
voice_id?: string;
name?: string;
category?: string;
description?: string;
}>;
};
return Array.isArray(json.voices)
? json.voices
.map((voice) => ({
id: voice.voice_id?.trim() ?? "",
name: trimToUndefined(voice.name),
category: trimToUndefined(voice.category),
description: trimToUndefined(voice.description),
}))
.filter((voice) => voice.id.length > 0)
: [];
try {
await assertOkOrThrowProviderError(response, "ElevenLabs voices API error");
const json = (await response.json()) as {
voices?: Array<{
voice_id?: string;
name?: string;
category?: string;
description?: string;
}>;
};
return Array.isArray(json.voices)
? json.voices
.map((voice) => ({
id: voice.voice_id?.trim() ?? "",
name: trimToUndefined(voice.name),
category: trimToUndefined(voice.category),
description: trimToUndefined(voice.description),
}))
.filter((voice) => voice.id.length > 0)
: [];
} finally {
await release();
}
}
export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {

View File

@@ -17,6 +17,10 @@ import type {
SpeechVoiceOption,
} from "openclaw/plugin-sdk/speech";
import { asBoolean, asFiniteNumber, asObject, trimToUndefined } from "openclaw/plugin-sdk/speech";
import {
fetchWithSsrFGuard,
ssrfPolicyFromHttpBaseUrlAllowedHostname,
} from "openclaw/plugin-sdk/ssrf-runtime";
import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
import { edgeTTS, inferEdgeExtension } from "./tts.js";
@@ -138,39 +142,48 @@ export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
"https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
`?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`;
const headers = buildMicrosoftVoiceHeaders();
const response = await fetch(url, {
headers,
const { response, release } = await fetchWithSsrFGuard({
url,
init: {
headers,
},
policy: ssrfPolicyFromHttpBaseUrlAllowedHostname("https://speech.platform.bing.com"),
auditContext: "microsoft.speech.voices",
});
if (!isDebugProxyGlobalFetchPatchInstalled()) {
captureHttpExchange({
url,
method: "GET",
requestHeaders: headers,
response,
transport: "http",
meta: {
provider: "microsoft",
capability: "speech-voices",
},
});
try {
if (!isDebugProxyGlobalFetchPatchInstalled()) {
captureHttpExchange({
url,
method: "GET",
requestHeaders: headers,
response,
transport: "http",
meta: {
provider: "microsoft",
capability: "speech-voices",
},
});
}
await assertOkOrThrowProviderError(response, "Microsoft voices API error");
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
return Array.isArray(voices)
? voices
.map((voice) => ({
id: voice.ShortName?.trim() ?? "",
name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName),
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
description: formatMicrosoftVoiceDescription(voice),
locale: trimToUndefined(voice.Locale),
gender: trimToUndefined(voice.Gender),
personalities: voice.VoiceTag?.VoicePersonalities?.filter(
(value): value is string => value.trim().length > 0,
),
}))
.filter((voice) => voice.id.length > 0)
: [];
} finally {
await release();
}
await assertOkOrThrowProviderError(response, "Microsoft voices API error");
const voices = (await response.json()) as MicrosoftVoiceListEntry[];
return Array.isArray(voices)
? voices
.map((voice) => ({
id: voice.ShortName?.trim() ?? "",
name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName),
category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
description: formatMicrosoftVoiceDescription(voice),
locale: trimToUndefined(voice.Locale),
gender: trimToUndefined(voice.Gender),
personalities: voice.VoiceTag?.VoicePersonalities?.filter(
(value): value is string => value.trim().length > 0,
),
}))
.filter((voice) => voice.id.length > 0)
: [];
}
export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {

View File

@@ -1,4 +1,8 @@
import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
import {
fetchWithSsrFGuard,
ssrfPolicyFromHttpBaseUrlAllowedHostname,
} from "openclaw/plugin-sdk/ssrf-runtime";
export const DEFAULT_MINIMAX_TTS_BASE_URL = "https://api.minimax.io";
@@ -51,38 +55,47 @@ export async function minimaxTTS(params: {
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch(`${baseUrl}/v1/t2a_v2`, {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
const { response, release } = await fetchWithSsrFGuard({
url: `${baseUrl}/v1/t2a_v2`,
init: {
method: "POST",
headers: {
Authorization: `Bearer ${apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
model,
text,
voice_setting: {
voice_id: voiceId,
speed,
vol,
pitch,
},
audio_setting: {
format,
sample_rate: sampleRate,
},
}),
signal: controller.signal,
},
body: JSON.stringify({
model,
text,
voice_setting: {
voice_id: voiceId,
speed,
vol,
pitch,
},
audio_setting: {
format,
sample_rate: sampleRate,
},
}),
signal: controller.signal,
timeoutMs,
policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(baseUrl),
auditContext: "minimax.tts",
});
try {
await assertOkOrThrowProviderError(response, "MiniMax TTS API error");
await assertOkOrThrowProviderError(response, "MiniMax TTS API error");
const body = (await response.json()) as { data?: { audio?: string } };
const hexAudio = body?.data?.audio;
if (!hexAudio) {
throw new Error("MiniMax TTS API returned no audio data");
}
const body = (await response.json()) as { data?: { audio?: string } };
const hexAudio = body?.data?.audio;
if (!hexAudio) {
throw new Error("MiniMax TTS API returned no audio data");
return Buffer.from(hexAudio, "hex");
} finally {
await release();
}
return Buffer.from(hexAudio, "hex");
} finally {
clearTimeout(timeout);
}