diff --git a/extensions/elevenlabs/speech-provider.ts b/extensions/elevenlabs/speech-provider.ts index f1c42d65288..4155cd9b258 100644 --- a/extensions/elevenlabs/speech-provider.ts +++ b/extensions/elevenlabs/speech-provider.ts @@ -18,6 +18,10 @@ import { requireInRange, trimToUndefined, } from "openclaw/plugin-sdk/speech"; +import { + fetchWithSsrFGuard, + ssrfPolicyFromHttpBaseUrlAllowedHostname, +} from "openclaw/plugin-sdk/ssrf-runtime"; import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime"; import { resolveElevenLabsApiKeyWithProfileFallback } from "./config-api.js"; import { isValidElevenLabsVoiceId, normalizeElevenLabsBaseUrl } from "./shared.js"; @@ -293,30 +297,40 @@ export async function listElevenLabsVoices(params: { apiKey: string; baseUrl?: string; }): Promise { - const res = await fetch(`${normalizeElevenLabsBaseUrl(params.baseUrl)}/v1/voices`, { - headers: { - "xi-api-key": params.apiKey, + const normalizedBaseUrl = normalizeElevenLabsBaseUrl(params.baseUrl); + const { response, release } = await fetchWithSsrFGuard({ + url: `${normalizedBaseUrl}/v1/voices`, + init: { + headers: { + "xi-api-key": params.apiKey, + }, }, + policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(normalizedBaseUrl), + auditContext: "elevenlabs.voices", }); - await assertOkOrThrowProviderError(res, "ElevenLabs voices API error"); - const json = (await res.json()) as { - voices?: Array<{ - voice_id?: string; - name?: string; - category?: string; - description?: string; - }>; - }; - return Array.isArray(json.voices) - ? json.voices - .map((voice) => ({ - id: voice.voice_id?.trim() ?? "", - name: trimToUndefined(voice.name), - category: trimToUndefined(voice.category), - description: trimToUndefined(voice.description), - })) - .filter((voice) => voice.id.length > 0) - : []; + try { + await assertOkOrThrowProviderError(response, "ElevenLabs voices API error"); + const json = (await response.json()) as { + voices?: Array<{ + voice_id?: string; + name?: string; + category?: string; + description?: string; + }>; + }; + return Array.isArray(json.voices) + ? json.voices + .map((voice) => ({ + id: voice.voice_id?.trim() ?? "", + name: trimToUndefined(voice.name), + category: trimToUndefined(voice.category), + description: trimToUndefined(voice.description), + })) + .filter((voice) => voice.id.length > 0) + : []; + } finally { + await release(); + } } export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin { diff --git a/extensions/microsoft/speech-provider.ts b/extensions/microsoft/speech-provider.ts index 7e7ad2fdbd7..0a1381fdd1e 100644 --- a/extensions/microsoft/speech-provider.ts +++ b/extensions/microsoft/speech-provider.ts @@ -17,6 +17,10 @@ import type { SpeechVoiceOption, } from "openclaw/plugin-sdk/speech"; import { asBoolean, asFiniteNumber, asObject, trimToUndefined } from "openclaw/plugin-sdk/speech"; +import { + fetchWithSsrFGuard, + ssrfPolicyFromHttpBaseUrlAllowedHostname, +} from "openclaw/plugin-sdk/ssrf-runtime"; import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path"; import { edgeTTS, inferEdgeExtension } from "./tts.js"; @@ -138,39 +142,48 @@ export async function listMicrosoftVoices(): Promise { "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" + `?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`; const headers = buildMicrosoftVoiceHeaders(); - const response = await fetch(url, { - headers, + const { response, release } = await fetchWithSsrFGuard({ + url, + init: { + headers, + }, + policy: ssrfPolicyFromHttpBaseUrlAllowedHostname("https://speech.platform.bing.com"), + auditContext: "microsoft.speech.voices", }); - if (!isDebugProxyGlobalFetchPatchInstalled()) { - captureHttpExchange({ - url, - method: "GET", - requestHeaders: headers, - response, - transport: "http", - meta: { - provider: "microsoft", - capability: "speech-voices", - }, - }); + try { + if (!isDebugProxyGlobalFetchPatchInstalled()) { + captureHttpExchange({ + url, + method: "GET", + requestHeaders: headers, + response, + transport: "http", + meta: { + provider: "microsoft", + capability: "speech-voices", + }, + }); + } + await assertOkOrThrowProviderError(response, "Microsoft voices API error"); + const voices = (await response.json()) as MicrosoftVoiceListEntry[]; + return Array.isArray(voices) + ? voices + .map((voice) => ({ + id: voice.ShortName?.trim() ?? "", + name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName), + category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0), + description: formatMicrosoftVoiceDescription(voice), + locale: trimToUndefined(voice.Locale), + gender: trimToUndefined(voice.Gender), + personalities: voice.VoiceTag?.VoicePersonalities?.filter( + (value): value is string => value.trim().length > 0, + ), + })) + .filter((voice) => voice.id.length > 0) + : []; + } finally { + await release(); } - await assertOkOrThrowProviderError(response, "Microsoft voices API error"); - const voices = (await response.json()) as MicrosoftVoiceListEntry[]; - return Array.isArray(voices) - ? voices - .map((voice) => ({ - id: voice.ShortName?.trim() ?? "", - name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName), - category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0), - description: formatMicrosoftVoiceDescription(voice), - locale: trimToUndefined(voice.Locale), - gender: trimToUndefined(voice.Gender), - personalities: voice.VoiceTag?.VoicePersonalities?.filter( - (value): value is string => value.trim().length > 0, - ), - })) - .filter((voice) => voice.id.length > 0) - : []; } export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin { diff --git a/extensions/minimax/tts.ts b/extensions/minimax/tts.ts index 2801a5c2e83..d5b3e07560d 100644 --- a/extensions/minimax/tts.ts +++ b/extensions/minimax/tts.ts @@ -1,4 +1,8 @@ import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http"; +import { + fetchWithSsrFGuard, + ssrfPolicyFromHttpBaseUrlAllowedHostname, +} from "openclaw/plugin-sdk/ssrf-runtime"; export const DEFAULT_MINIMAX_TTS_BASE_URL = "https://api.minimax.io"; @@ -51,38 +55,47 @@ export async function minimaxTTS(params: { const timeout = setTimeout(() => controller.abort(), timeoutMs); try { - const response = await fetch(`${baseUrl}/v1/t2a_v2`, { - method: "POST", - headers: { - Authorization: `Bearer ${apiKey}`, - "Content-Type": "application/json", + const { response, release } = await fetchWithSsrFGuard({ + url: `${baseUrl}/v1/t2a_v2`, + init: { + method: "POST", + headers: { + Authorization: `Bearer ${apiKey}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model, + text, + voice_setting: { + voice_id: voiceId, + speed, + vol, + pitch, + }, + audio_setting: { + format, + sample_rate: sampleRate, + }, + }), + signal: controller.signal, }, - body: JSON.stringify({ - model, - text, - voice_setting: { - voice_id: voiceId, - speed, - vol, - pitch, - }, - audio_setting: { - format, - sample_rate: sampleRate, - }, - }), - signal: controller.signal, + timeoutMs, + policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(baseUrl), + auditContext: "minimax.tts", }); + try { + await assertOkOrThrowProviderError(response, "MiniMax TTS API error"); - await assertOkOrThrowProviderError(response, "MiniMax TTS API error"); + const body = (await response.json()) as { data?: { audio?: string } }; + const hexAudio = body?.data?.audio; + if (!hexAudio) { + throw new Error("MiniMax TTS API returned no audio data"); + } - const body = (await response.json()) as { data?: { audio?: string } }; - const hexAudio = body?.data?.audio; - if (!hexAudio) { - throw new Error("MiniMax TTS API returned no audio data"); + return Buffer.from(hexAudio, "hex"); + } finally { + await release(); } - - return Buffer.from(hexAudio, "hex"); } finally { clearTimeout(timeout); }