neuroread/public/workers/kokoro-worker.js
2025-05-05 11:07:55 -04:00

81 lines
2.1 KiB
JavaScript

console.log("Initializing Kokoro TTS Worker");
import {
KokoroTTS,
TextSplitterStream,
} from "https://cdn.jsdelivr.net/npm/kokoro-js@1.2.0/+esm";
async function detectWebGPU() {
try {
const adapter = await navigator.gpu.requestAdapter();
return !!adapter;
} catch (e) {
return false;
}
}
function blobToBase64(blob) {
return new Promise((resolve, _) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.readAsDataURL(blob);
});
}
// Device detection
const device = (await detectWebGPU()) ? "webgpu" : "wasm";
self.postMessage({ status: "device", device });
console.log(`Detected device: ${device}`);
// Load the model
const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
const tts = await KokoroTTS.from_pretrained(model_id, {
dtype: device === "wasm" ? "q8" : "fp32",
device,
progressCallback: (progress) => {
self.postMessage({ status: "progress", progress });
console.log(`Loading progress: ${progress * 100}%`);
},
});
const splitter = new TextSplitterStream();
const stream = tts.stream(splitter);
let index = 0;
// Listen for messages from the main thread
self.addEventListener("message", async (e) => {
const { text, voice, index } = e.data;
console.log(
`Generating speech for text: "${text}" with voice: ${voice}, index: ${index}`
);
// Push the text to the splitter
splitter.push(text);
splitter.push(""); // Signal the end of the text
// Process the stream and include the correct index
for await (const { text: processedText, phonemes, audio } of stream) {
console.log({ processedText, phonemes });
const blob = audio.toBlob();
const base64Audio = await blobToBase64(blob);
self.postMessage({
status: "complete",
audio: base64Audio,
text: processedText,
phonemes,
index, // Include the index from the original message
});
break; // Stop processing after the first chunk for this message
}
});
console.log("Kokoro TTS model loaded successfully");
self.postMessage({ status: "ready", voices: tts.voices, device });
console.log("Available voices:", tts.voices);