This commit is contained in:
Jack Merrill 2025-04-24 17:57:19 -04:00
parent 31e0848c95
commit 3eda271635
No known key found for this signature in database
GPG Key ID: FD574AFF96E99636
3 changed files with 29 additions and 3 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -8,6 +8,7 @@ import React, {
ReactNode, ReactNode,
} from "react"; } from "react";
import removeMarkdown from "remove-markdown"; import removeMarkdown from "remove-markdown";
import { toast } from "sonner";
// More robust sentence splitter using Intl.Segmenter for better accuracy. // More robust sentence splitter using Intl.Segmenter for better accuracy.
function splitIntoSentences(text: string): string[] { function splitIntoSentences(text: string): string[] {
@ -49,7 +50,9 @@ export const TTSProvider = ({
}) => { }) => {
// Combine pages and split into sentences. // Combine pages and split into sentences.
const fullText = pages.join("\n"); const fullText = pages.join("\n");
const sentences = splitIntoSentences(fullText); const sentences = splitIntoSentences(fullText).filter(
(sentence) => sentence.trim() !== "\\n" && sentence.trim() !== ""
);
const [currentSentence, setCurrentSentence] = useState(0); const [currentSentence, setCurrentSentence] = useState(0);
const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>( const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>(
@ -95,6 +98,7 @@ export const TTSProvider = ({
localStorage.setItem(key, e.data.audio); localStorage.setItem(key, e.data.audio);
resolve(e.data.audio); resolve(e.data.audio);
} else if (e.data.status === "error") { } else if (e.data.status === "error") {
toast.error(`Error generating audio: ${e.data.error}`);
reject(e.data.error); reject(e.data.error);
} }
}, },
@ -163,6 +167,7 @@ export const TTSProvider = ({
const end = Math.min(sentences.length, currentSentence + 3); const end = Math.min(sentences.length, currentSentence + 3);
for (let i = currentSentence; i < end; i++) { for (let i = currentSentence; i < end; i++) {
if (!newBuffer[i]) { if (!newBuffer[i]) {
console.log("Preloading TTS for sentence:", i, sentences[i]);
newBuffer[i] = await generateTTSForIndex( newBuffer[i] = await generateTTSForIndex(
removeMarkdown(sentences[i]), removeMarkdown(sentences[i]),
i i
@ -191,7 +196,11 @@ export const TTSProvider = ({
} }
if (audioRef.current) { if (audioRef.current) {
audioRef.current.src = audioUrl; audioRef.current.src = audioUrl;
await audioRef.current.play(); await new Promise((res) => {
audioRef.current!.play();
audioRef.current!.onended = () => res(true);
});
} }
}; };
@ -202,10 +211,13 @@ export const TTSProvider = ({
const playInOrder = async (index: number) => { const playInOrder = async (index: number) => {
if (index < 0 || index >= sentences.length) return; if (index < 0 || index >= sentences.length) return;
console.log("Playing in order from index:", index);
setCurrentSentence(index); setCurrentSentence(index);
for (let i = index; i < sentences.length; i++) { for (let i = index; i < sentences.length; i++) {
console.log("Playing sentence:", i, sentences[i]);
await playSentence(i); await playSentence(i);
if (i < sentences.length - 1) { if (i < sentences.length - 1) {
console.log("Waiting for next sentence...");
await new Promise((resolve) => setTimeout(resolve, 1000)); await new Promise((resolve) => setTimeout(resolve, 1000));
} }
} }

View File

@ -9,6 +9,15 @@ async function detectWebGPU() {
return false; return false;
} }
} }
function blobToBase64(blob) {
return new Promise((resolve, _) => {
const reader = new FileReader();
reader.onloadend = () => resolve(reader.result);
reader.readAsDataURL(blob);
});
}
// Device detection // Device detection
const device = (await detectWebGPU()) ? "webgpu" : "wasm"; const device = (await detectWebGPU()) ? "webgpu" : "wasm";
self.postMessage({ status: "device", device }); self.postMessage({ status: "device", device });
@ -20,6 +29,10 @@ const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
const tts = await KokoroTTS.from_pretrained(model_id, { const tts = await KokoroTTS.from_pretrained(model_id, {
dtype: device === "wasm" ? "q8" : "fp32", dtype: device === "wasm" ? "q8" : "fp32",
device, device,
progressCallback: (progress) => {
self.postMessage({ status: "progress", progress });
console.log(`Loading progress: ${progress * 100}%`);
},
}); });
console.log("Kokoro TTS model loaded successfully"); console.log("Kokoro TTS model loaded successfully");
@ -34,13 +47,14 @@ self.addEventListener("message", async (e) => {
try { try {
// Generate speech // Generate speech
console.log(`Generating speech for text: "${text}" with voice: ${voice}`);
const audio = await tts.generate(text, { voice }); const audio = await tts.generate(text, { voice });
// Send the audio file back to the main thread // Send the audio file back to the main thread
const blob = audio.toBlob(); const blob = audio.toBlob();
self.postMessage({ self.postMessage({
status: "complete", status: "complete",
audio: URL.createObjectURL(blob), audio: await blobToBase64(blob),
text, text,
}); });
} catch (error) { } catch (error) {