fix: guard speech provider fetches

2026-04-28 12:36:55 +02:00 · 2026-04-24 20:51:18 +01:00
parent 25ad66520b
commit 7425cb0549
3 changed files with 120 additions and 80 deletions
--- a/extensions/elevenlabs/speech-provider.ts
+++ b/extensions/elevenlabs/speech-provider.ts
@@ -18,6 +18,10 @@ import {
  requireInRange,
  trimToUndefined,
 } from "openclaw/plugin-sdk/speech";
+import {
+  fetchWithSsrFGuard,
+  ssrfPolicyFromHttpBaseUrlAllowedHostname,
+} from "openclaw/plugin-sdk/ssrf-runtime";
 import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
 import { resolveElevenLabsApiKeyWithProfileFallback } from "./config-api.js";
 import { isValidElevenLabsVoiceId, normalizeElevenLabsBaseUrl } from "./shared.js";
@@ -293,30 +297,40 @@ export async function listElevenLabsVoices(params: {
  apiKey: string;
  baseUrl?: string;
 }): Promise<SpeechVoiceOption[]> {
-  const res = await fetch(`${normalizeElevenLabsBaseUrl(params.baseUrl)}/v1/voices`, {
-    headers: {
-      "xi-api-key": params.apiKey,
+  const normalizedBaseUrl = normalizeElevenLabsBaseUrl(params.baseUrl);
+  const { response, release } = await fetchWithSsrFGuard({
+    url: `${normalizedBaseUrl}/v1/voices`,
+    init: {
+      headers: {
+        "xi-api-key": params.apiKey,
+      },
    },
+    policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(normalizedBaseUrl),
+    auditContext: "elevenlabs.voices",
  });
-  await assertOkOrThrowProviderError(res, "ElevenLabs voices API error");
-  const json = (await res.json()) as {
-    voices?: Array<{
-      voice_id?: string;
-      name?: string;
-      category?: string;
-      description?: string;
-    }>;
-  };
-  return Array.isArray(json.voices)
-    ? json.voices
-        .map((voice) => ({
-          id: voice.voice_id?.trim() ?? "",
-          name: trimToUndefined(voice.name),
-          category: trimToUndefined(voice.category),
-          description: trimToUndefined(voice.description),
-        }))
-        .filter((voice) => voice.id.length > 0)
-    : [];
+  try {
+    await assertOkOrThrowProviderError(response, "ElevenLabs voices API error");
+    const json = (await response.json()) as {
+      voices?: Array<{
+        voice_id?: string;
+        name?: string;
+        category?: string;
+        description?: string;
+      }>;
+    };
+    return Array.isArray(json.voices)
+      ? json.voices
+          .map((voice) => ({
+            id: voice.voice_id?.trim() ?? "",
+            name: trimToUndefined(voice.name),
+            category: trimToUndefined(voice.category),
+            description: trimToUndefined(voice.description),
+          }))
+          .filter((voice) => voice.id.length > 0)
+      : [];
+  } finally {
+    await release();
+  }
 }

 export function buildElevenLabsSpeechProvider(): SpeechProviderPlugin {
--- a/extensions/microsoft/speech-provider.ts
+++ b/extensions/microsoft/speech-provider.ts
@@ -17,6 +17,10 @@ import type {
  SpeechVoiceOption,
 } from "openclaw/plugin-sdk/speech";
 import { asBoolean, asFiniteNumber, asObject, trimToUndefined } from "openclaw/plugin-sdk/speech";
+import {
+  fetchWithSsrFGuard,
+  ssrfPolicyFromHttpBaseUrlAllowedHostname,
+} from "openclaw/plugin-sdk/ssrf-runtime";
 import { resolvePreferredOpenClawTmpDir } from "openclaw/plugin-sdk/temp-path";
 import { edgeTTS, inferEdgeExtension } from "./tts.js";

@@ -138,39 +142,48 @@ export async function listMicrosoftVoices(): Promise<SpeechVoiceOption[]> {
    "https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list" +
    `?trustedclienttoken=${TRUSTED_CLIENT_TOKEN}`;
  const headers = buildMicrosoftVoiceHeaders();
-  const response = await fetch(url, {
-    headers,
+  const { response, release } = await fetchWithSsrFGuard({
+    url,
+    init: {
+      headers,
+    },
+    policy: ssrfPolicyFromHttpBaseUrlAllowedHostname("https://speech.platform.bing.com"),
+    auditContext: "microsoft.speech.voices",
  });
-  if (!isDebugProxyGlobalFetchPatchInstalled()) {
-    captureHttpExchange({
-      url,
-      method: "GET",
-      requestHeaders: headers,
-      response,
-      transport: "http",
-      meta: {
-        provider: "microsoft",
-        capability: "speech-voices",
-      },
-    });
+  try {
+    if (!isDebugProxyGlobalFetchPatchInstalled()) {
+      captureHttpExchange({
+        url,
+        method: "GET",
+        requestHeaders: headers,
+        response,
+        transport: "http",
+        meta: {
+          provider: "microsoft",
+          capability: "speech-voices",
+        },
+      });
+    }
+    await assertOkOrThrowProviderError(response, "Microsoft voices API error");
+    const voices = (await response.json()) as MicrosoftVoiceListEntry[];
+    return Array.isArray(voices)
+      ? voices
+          .map((voice) => ({
+            id: voice.ShortName?.trim() ?? "",
+            name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName),
+            category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
+            description: formatMicrosoftVoiceDescription(voice),
+            locale: trimToUndefined(voice.Locale),
+            gender: trimToUndefined(voice.Gender),
+            personalities: voice.VoiceTag?.VoicePersonalities?.filter(
+              (value): value is string => value.trim().length > 0,
+            ),
+          }))
+          .filter((voice) => voice.id.length > 0)
+      : [];
+  } finally {
+    await release();
  }
-  await assertOkOrThrowProviderError(response, "Microsoft voices API error");
-  const voices = (await response.json()) as MicrosoftVoiceListEntry[];
-  return Array.isArray(voices)
-    ? voices
-        .map((voice) => ({
-          id: voice.ShortName?.trim() ?? "",
-          name: trimToUndefined(voice.FriendlyName) ?? trimToUndefined(voice.ShortName),
-          category: voice.VoiceTag?.ContentCategories?.find((value) => value.trim().length > 0),
-          description: formatMicrosoftVoiceDescription(voice),
-          locale: trimToUndefined(voice.Locale),
-          gender: trimToUndefined(voice.Gender),
-          personalities: voice.VoiceTag?.VoicePersonalities?.filter(
-            (value): value is string => value.trim().length > 0,
-          ),
-        }))
-        .filter((voice) => voice.id.length > 0)
-    : [];
 }

 export function buildMicrosoftSpeechProvider(): SpeechProviderPlugin {
--- a/extensions/minimax/tts.ts
+++ b/extensions/minimax/tts.ts
@@ -1,4 +1,8 @@
 import { assertOkOrThrowProviderError } from "openclaw/plugin-sdk/provider-http";
+import {
+  fetchWithSsrFGuard,
+  ssrfPolicyFromHttpBaseUrlAllowedHostname,
+} from "openclaw/plugin-sdk/ssrf-runtime";

 export const DEFAULT_MINIMAX_TTS_BASE_URL = "https://api.minimax.io";

@@ -51,38 +55,47 @@ export async function minimaxTTS(params: {
  const timeout = setTimeout(() => controller.abort(), timeoutMs);

  try {
-    const response = await fetch(`${baseUrl}/v1/t2a_v2`, {
-      method: "POST",
-      headers: {
-        Authorization: `Bearer ${apiKey}`,
-        "Content-Type": "application/json",
+    const { response, release } = await fetchWithSsrFGuard({
+      url: `${baseUrl}/v1/t2a_v2`,
+      init: {
+        method: "POST",
+        headers: {
+          Authorization: `Bearer ${apiKey}`,
+          "Content-Type": "application/json",
+        },
+        body: JSON.stringify({
+          model,
+          text,
+          voice_setting: {
+            voice_id: voiceId,
+            speed,
+            vol,
+            pitch,
+          },
+          audio_setting: {
+            format,
+            sample_rate: sampleRate,
+          },
+        }),
+        signal: controller.signal,
      },
-      body: JSON.stringify({
-        model,
-        text,
-        voice_setting: {
-          voice_id: voiceId,
-          speed,
-          vol,
-          pitch,
-        },
-        audio_setting: {
-          format,
-          sample_rate: sampleRate,
-        },
-      }),
-      signal: controller.signal,
+      timeoutMs,
+      policy: ssrfPolicyFromHttpBaseUrlAllowedHostname(baseUrl),
+      auditContext: "minimax.tts",
    });
+    try {
+      await assertOkOrThrowProviderError(response, "MiniMax TTS API error");

-    await assertOkOrThrowProviderError(response, "MiniMax TTS API error");
+      const body = (await response.json()) as { data?: { audio?: string } };
+      const hexAudio = body?.data?.audio;
+      if (!hexAudio) {
+        throw new Error("MiniMax TTS API returned no audio data");
+      }

-    const body = (await response.json()) as { data?: { audio?: string } };
-    const hexAudio = body?.data?.audio;
-    if (!hexAudio) {
-      throw new Error("MiniMax TTS API returned no audio data");
+      return Buffer.from(hexAudio, "hex");
+    } finally {
+      await release();
    }
-
-    return Buffer.from(hexAudio, "hex");
  } finally {
    clearTimeout(timeout);
  }