neuroread/public/workers/kokoro-worker.js

console.log("Initializing Kokoro TTS Worker");

import {
  KokoroTTS,
  TextSplitterStream,
} from "https://cdn.jsdelivr.net/npm/kokoro-js@1.2.0/+esm";
async function detectWebGPU() {
  try {
    const adapter = await navigator.gpu.requestAdapter();
    return !!adapter;
  } catch (e) {
    return false;
  }
}

function blobToBase64(blob) {
  return new Promise((resolve, _) => {
    const reader = new FileReader();
    reader.onloadend = () => resolve(reader.result);
    reader.readAsDataURL(blob);
  });
}

// Device detection
const device = (await detectWebGPU()) ? "webgpu" : "wasm";
self.postMessage({ status: "device", device });

console.log(`Detected device: ${device}`);

// Load the model
const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
const tts = await KokoroTTS.from_pretrained(model_id, {
  dtype: device === "wasm" ? "q8" : "fp32",
  device,
  progressCallback: (progress) => {
    self.postMessage({ status: "progress", progress });
    console.log(`Loading progress: ${progress * 100}%`);
  },
});

const splitter = new TextSplitterStream();
const stream = tts.stream(splitter);
let index = 0;

// Listen for messages from the main thread
self.addEventListener("message", async (e) => {
  const { text, voice, index } = e.data;

  console.log(
    `Generating speech for text: "${text}" with voice: ${voice}, index: ${index}`
  );

  // Push the text to the splitter
  splitter.push(text);
  splitter.push(""); // Signal the end of the text

  // Process the stream and include the correct index
  for await (const { text: processedText, phonemes, audio } of stream) {
    console.log({ processedText, phonemes });

    const blob = audio.toBlob();
    const base64Audio = await blobToBase64(blob);

    self.postMessage({
      status: "complete",
      audio: base64Audio,
      text: processedText,
      phonemes,
      index, // Include the index from the original message
    });

    break; // Stop processing after the first chunk for this message
  }
});

console.log("Kokoro TTS model loaded successfully");

self.postMessage({ status: "ready", voices: tts.voices, device });

console.log("Available voices:", tts.voices);