TTS QoL
This commit is contained in:
parent
31e0848c95
commit
3eda271635
@ -8,6 +8,7 @@ import React, {
|
||||
ReactNode,
|
||||
} from "react";
|
||||
import removeMarkdown from "remove-markdown";
|
||||
import { toast } from "sonner";
|
||||
|
||||
// More robust sentence splitter using Intl.Segmenter for better accuracy.
|
||||
function splitIntoSentences(text: string): string[] {
|
||||
@ -49,7 +50,9 @@ export const TTSProvider = ({
|
||||
}) => {
|
||||
// Combine pages and split into sentences.
|
||||
const fullText = pages.join("\n");
|
||||
const sentences = splitIntoSentences(fullText);
|
||||
const sentences = splitIntoSentences(fullText).filter(
|
||||
(sentence) => sentence.trim() !== "\\n" && sentence.trim() !== ""
|
||||
);
|
||||
|
||||
const [currentSentence, setCurrentSentence] = useState(0);
|
||||
const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>(
|
||||
@ -95,6 +98,7 @@ export const TTSProvider = ({
|
||||
localStorage.setItem(key, e.data.audio);
|
||||
resolve(e.data.audio);
|
||||
} else if (e.data.status === "error") {
|
||||
toast.error(`Error generating audio: ${e.data.error}`);
|
||||
reject(e.data.error);
|
||||
}
|
||||
},
|
||||
@ -163,6 +167,7 @@ export const TTSProvider = ({
|
||||
const end = Math.min(sentences.length, currentSentence + 3);
|
||||
for (let i = currentSentence; i < end; i++) {
|
||||
if (!newBuffer[i]) {
|
||||
console.log("Preloading TTS for sentence:", i, sentences[i]);
|
||||
newBuffer[i] = await generateTTSForIndex(
|
||||
removeMarkdown(sentences[i]),
|
||||
i
|
||||
@ -191,7 +196,11 @@ export const TTSProvider = ({
|
||||
}
|
||||
if (audioRef.current) {
|
||||
audioRef.current.src = audioUrl;
|
||||
await audioRef.current.play();
|
||||
await new Promise((res) => {
|
||||
audioRef.current!.play();
|
||||
|
||||
audioRef.current!.onended = () => res(true);
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@ -202,10 +211,13 @@ export const TTSProvider = ({
|
||||
|
||||
const playInOrder = async (index: number) => {
|
||||
if (index < 0 || index >= sentences.length) return;
|
||||
console.log("Playing in order from index:", index);
|
||||
setCurrentSentence(index);
|
||||
for (let i = index; i < sentences.length; i++) {
|
||||
console.log("Playing sentence:", i, sentences[i]);
|
||||
await playSentence(i);
|
||||
if (i < sentences.length - 1) {
|
||||
console.log("Waiting for next sentence...");
|
||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||
}
|
||||
}
|
||||
|
@ -9,6 +9,15 @@ async function detectWebGPU() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function blobToBase64(blob) {
|
||||
return new Promise((resolve, _) => {
|
||||
const reader = new FileReader();
|
||||
reader.onloadend = () => resolve(reader.result);
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
}
|
||||
|
||||
// Device detection
|
||||
const device = (await detectWebGPU()) ? "webgpu" : "wasm";
|
||||
self.postMessage({ status: "device", device });
|
||||
@ -20,6 +29,10 @@ const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
|
||||
const tts = await KokoroTTS.from_pretrained(model_id, {
|
||||
dtype: device === "wasm" ? "q8" : "fp32",
|
||||
device,
|
||||
progressCallback: (progress) => {
|
||||
self.postMessage({ status: "progress", progress });
|
||||
console.log(`Loading progress: ${progress * 100}%`);
|
||||
},
|
||||
});
|
||||
|
||||
console.log("Kokoro TTS model loaded successfully");
|
||||
@ -34,13 +47,14 @@ self.addEventListener("message", async (e) => {
|
||||
|
||||
try {
|
||||
// Generate speech
|
||||
console.log(`Generating speech for text: "${text}" with voice: ${voice}`);
|
||||
const audio = await tts.generate(text, { voice });
|
||||
|
||||
// Send the audio file back to the main thread
|
||||
const blob = audio.toBlob();
|
||||
self.postMessage({
|
||||
status: "complete",
|
||||
audio: URL.createObjectURL(blob),
|
||||
audio: await blobToBase64(blob),
|
||||
text,
|
||||
});
|
||||
} catch (error) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user