This commit is contained in:
Jack Merrill 2025-04-24 17:57:19 -04:00
parent 31e0848c95
commit 3eda271635
No known key found for this signature in database
GPG Key ID: FD574AFF96E99636
3 changed files with 29 additions and 3 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -8,6 +8,7 @@ import React, {
ReactNode,
} from "react";
import removeMarkdown from "remove-markdown";
import { toast } from "sonner";
// More robust sentence splitter using Intl.Segmenter for better accuracy.
function splitIntoSentences(text: string): string[] {
@ -49,7 +50,9 @@ export const TTSProvider = ({
}) => {
// Combine pages and split into sentences.
const fullText = pages.join("\n");
const sentences = splitIntoSentences(fullText);
const sentences = splitIntoSentences(fullText).filter(
(sentence) => sentence.trim() !== "\\n" && sentence.trim() !== ""
);
const [currentSentence, setCurrentSentence] = useState(0);
const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>(
@ -95,6 +98,7 @@ export const TTSProvider = ({
localStorage.setItem(key, e.data.audio);
resolve(e.data.audio);
} else if (e.data.status === "error") {
toast.error(`Error generating audio: ${e.data.error}`);
reject(e.data.error);
}
},
@ -163,6 +167,7 @@ export const TTSProvider = ({
const end = Math.min(sentences.length, currentSentence + 3);
for (let i = currentSentence; i < end; i++) {
if (!newBuffer[i]) {
console.log("Preloading TTS for sentence:", i, sentences[i]);
newBuffer[i] = await generateTTSForIndex(
removeMarkdown(sentences[i]),
i
@ -191,7 +196,11 @@ export const TTSProvider = ({
}
if (audioRef.current) {
audioRef.current.src = audioUrl;
await audioRef.current.play();
await new Promise((res) => {
audioRef.current!.play();
audioRef.current!.onended = () => res(true);
});
}
};
@ -202,10 +211,13 @@ export const TTSProvider = ({
const playInOrder = async (index: number) => {
if (index < 0 || index >= sentences.length) return;
console.log("Playing in order from index:", index);
setCurrentSentence(index);
for (let i = index; i < sentences.length; i++) {
console.log("Playing sentence:", i, sentences[i]);
await playSentence(i);
if (i < sentences.length - 1) {
console.log("Waiting for next sentence...");
await new Promise((resolve) => setTimeout(resolve, 1000));
}
}

View File

@ -9,6 +9,15 @@ async function detectWebGPU() {
return false;
}
}
function blobToBase64(blob) {
return new Promise((resolve, _) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.readAsDataURL(blob);
});
}
// Device detection
const device = (await detectWebGPU()) ? "webgpu" : "wasm";
self.postMessage({ status: "device", device });
@ -20,6 +29,10 @@ const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
const tts = await KokoroTTS.from_pretrained(model_id, {
dtype: device === "wasm" ? "q8" : "fp32",
device,
progressCallback: (progress) => {
self.postMessage({ status: "progress", progress });
console.log(`Loading progress: ${progress * 100}%`);
},
});
console.log("Kokoro TTS model loaded successfully");
@ -34,13 +47,14 @@ self.addEventListener("message", async (e) => {
try {
// Generate speech
console.log(`Generating speech for text: "${text}" with voice: ${voice}`);
const audio = await tts.generate(text, { voice });
// Send the audio file back to the main thread
const blob = audio.toBlob();
self.postMessage({
status: "complete",
audio: URL.createObjectURL(blob),
audio: await blobToBase64(blob),
text,
});
} catch (error) {