From 3eda27163553136c7b003cc3adfd6aa418fbe955 Mon Sep 17 00:00:00 2001 From: Jack Merrill Date: Thu, 24 Apr 2025 17:57:19 -0400 Subject: [PATCH] TTS QoL --- bun.lockb | Bin 222392 -> 222392 bytes components/TTSProvider.tsx | 16 ++++++++++++++-- public/workers/kokoro-worker.js | 16 +++++++++++++++- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/bun.lockb b/bun.lockb index 84a569d43abc9596cdd6ff8d6b3c31ac9febdb07..794f9fb8fae5beabeddab2c36791a2889fc89008 100755 GIT binary patch delta 879 zcmWO2Ur1AN6u|L&&t@vM{<+QEoinH*6-+JqN6thUw3jT)tjv{_P(IY0q!8i% zZ9o5PZDudktR7Z(Jro2D^`(5W9)q&Ch%);FMSU-PxaZz`&bhyH($lu|w5|Pyo|2tT zwaY0R%@(I@vjiemO&fwj7-JSqiz9(3Vi-XW1`t9o`p}Jjgb_q1I?x6`n$d(t)T03# z@J#3dpJ=MsCF=qobxoVY2YkXSOkxTdJi$|>U?Pof^q>lrs6j2NaR^6n7zeQr6*z!B z*pJ=Vq;ct*dgGGqYPBKu^~0=S1fz%|hWlhbKoS#ph)24rGo;4r;JXuc*svH&FhkC3 zOfu$-dTvO*x17o6dHjY2d__4NaG?OJ)L%o4f(0t)@f|<#3%~JGmANG^_Z{IZ-l__> z7)P538*vF2;71EC;yhY$87FZDXVHLWoWlv6#wi>_J&xlRx>Uw3y}7gODb834jy>v)%fN@^!KnJdynIidOZj9y24!y%W%dV(`tHJqd$^o?F6Y3_Etzslrrx`{jJV8( zfWOVDTxQvq4VvYEJ`^)(S{w?aNa!^!i4-O=g>gjCj{yu~2*ZdXhCYOG4?S?B9UbU` z7e4I93!x62qN!@D)Pz3hH0>i+@d>Yx#{w3S!81HZ7CA)FkE1w_6KKFmoI)c`qX}nl z7R_kEIn-mn#?oo(ja3e*y*4R#r5M39(wM<4CNPO9jA0xlD1{kYuoWhQs<%nAahb}m zv7qK{QdYA}r$^}eEBiOBVZCsMQk262tNLpbn`Mog-?5G#_=#Wmt?KMjY+NCHi+8HQ zF1FT(g!geC7vRQ4w4ogybl?&$qZ3`Y0xx{%##LOyb@&lLQ0471U`fzg5-ZGV8~b(? zVTWq1kS6uALP{F{k*?8$4gx(4*no}Lgw1$Q-aH;-1X08ghe8OwxUH%j(qb&;na_;w zi|TVoZQU&@^k9OFNlf7p5=bJ2X{0fOuv&G<%{_x$8Nx6|5Je1eD2!qZyHKSfm9o=P zOL&Y586Hm)AI2Sov-wK-Q&woFu(FE>xJFV)!{2)eyqXTamT;qj{nb5c!C_UW7$ GbN>PTmy^f< diff --git a/components/TTSProvider.tsx b/components/TTSProvider.tsx index 4d3654b..d04e872 100644 --- a/components/TTSProvider.tsx +++ b/components/TTSProvider.tsx @@ -8,6 +8,7 @@ import React, { ReactNode, } from "react"; import removeMarkdown from "remove-markdown"; +import { toast } from "sonner"; // More robust sentence splitter using Intl.Segmenter for better accuracy. function splitIntoSentences(text: string): string[] { @@ -49,7 +50,9 @@ export const TTSProvider = ({ }) => { // Combine pages and split into sentences. const fullText = pages.join("\n"); - const sentences = splitIntoSentences(fullText); + const sentences = splitIntoSentences(fullText).filter( + (sentence) => sentence.trim() !== "\\n" && sentence.trim() !== "" + ); const [currentSentence, setCurrentSentence] = useState(0); const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>( @@ -95,6 +98,7 @@ export const TTSProvider = ({ localStorage.setItem(key, e.data.audio); resolve(e.data.audio); } else if (e.data.status === "error") { + toast.error(`Error generating audio: ${e.data.error}`); reject(e.data.error); } }, @@ -163,6 +167,7 @@ export const TTSProvider = ({ const end = Math.min(sentences.length, currentSentence + 3); for (let i = currentSentence; i < end; i++) { if (!newBuffer[i]) { + console.log("Preloading TTS for sentence:", i, sentences[i]); newBuffer[i] = await generateTTSForIndex( removeMarkdown(sentences[i]), i @@ -191,7 +196,11 @@ export const TTSProvider = ({ } if (audioRef.current) { audioRef.current.src = audioUrl; - await audioRef.current.play(); + await new Promise((res) => { + audioRef.current!.play(); + + audioRef.current!.onended = () => res(true); + }); } }; @@ -202,10 +211,13 @@ export const TTSProvider = ({ const playInOrder = async (index: number) => { if (index < 0 || index >= sentences.length) return; + console.log("Playing in order from index:", index); setCurrentSentence(index); for (let i = index; i < sentences.length; i++) { + console.log("Playing sentence:", i, sentences[i]); await playSentence(i); if (i < sentences.length - 1) { + console.log("Waiting for next sentence..."); await new Promise((resolve) => setTimeout(resolve, 1000)); } } diff --git a/public/workers/kokoro-worker.js b/public/workers/kokoro-worker.js index 3b95646..40a088b 100644 --- a/public/workers/kokoro-worker.js +++ b/public/workers/kokoro-worker.js @@ -9,6 +9,15 @@ async function detectWebGPU() { return false; } } + +function blobToBase64(blob) { + return new Promise((resolve, _) => { + const reader = new FileReader(); + reader.onloadend = () => resolve(reader.result); + reader.readAsDataURL(blob); + }); +} + // Device detection const device = (await detectWebGPU()) ? "webgpu" : "wasm"; self.postMessage({ status: "device", device }); @@ -20,6 +29,10 @@ const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX"; const tts = await KokoroTTS.from_pretrained(model_id, { dtype: device === "wasm" ? "q8" : "fp32", device, + progressCallback: (progress) => { + self.postMessage({ status: "progress", progress }); + console.log(`Loading progress: ${progress * 100}%`); + }, }); console.log("Kokoro TTS model loaded successfully"); @@ -34,13 +47,14 @@ self.addEventListener("message", async (e) => { try { // Generate speech + console.log(`Generating speech for text: "${text}" with voice: ${voice}`); const audio = await tts.generate(text, { voice }); // Send the audio file back to the main thread const blob = audio.toBlob(); self.postMessage({ status: "complete", - audio: URL.createObjectURL(blob), + audio: await blobToBase64(blob), text, }); } catch (error) {