console.log("Initializing Kokoro TTS Worker"); import { KokoroTTS, TextSplitterStream, } from "https://cdn.jsdelivr.net/npm/kokoro-js@1.2.0/+esm"; async function detectWebGPU() { try { const adapter = await navigator.gpu.requestAdapter(); return !!adapter; } catch (e) { return false; } } function blobToBase64(blob) { return new Promise((resolve, _) => { const reader = new FileReader(); reader.onloadend = () => resolve(reader.result); reader.readAsDataURL(blob); }); } // Device detection const device = (await detectWebGPU()) ? "webgpu" : "wasm"; self.postMessage({ status: "device", device }); console.log(`Detected device: ${device}`); // Load the model const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX"; const tts = await KokoroTTS.from_pretrained(model_id, { dtype: device === "wasm" ? "q8" : "fp32", device, progressCallback: (progress) => { self.postMessage({ status: "progress", progress }); console.log(`Loading progress: ${progress * 100}%`); }, }); const splitter = new TextSplitterStream(); const stream = tts.stream(splitter); let index = 0; // Listen for messages from the main thread self.addEventListener("message", async (e) => { const { text, voice, index } = e.data; console.log( `Generating speech for text: "${text}" with voice: ${voice}, index: ${index}` ); // Push the text to the splitter splitter.push(text); // Process the stream and include the correct index for await (const { text: processedText, phonemes, audio } of stream) { console.log({ processedText, phonemes }); const blob = audio.toBlob(); const base64Audio = await blobToBase64(blob); self.postMessage({ status: "complete", audio: base64Audio, text: processedText, phonemes, index, // Include the index from the original message }); break; // Stop processing after the first chunk for this message } }); console.log("Kokoro TTS model loaded successfully"); self.postMessage({ status: "ready", voices: tts.voices, device }); console.log("Available voices:", tts.voices);