TTS QoL
This commit is contained in:
parent
31e0848c95
commit
3eda271635
@ -8,6 +8,7 @@ import React, {
|
|||||||
ReactNode,
|
ReactNode,
|
||||||
} from "react";
|
} from "react";
|
||||||
import removeMarkdown from "remove-markdown";
|
import removeMarkdown from "remove-markdown";
|
||||||
|
import { toast } from "sonner";
|
||||||
|
|
||||||
// More robust sentence splitter using Intl.Segmenter for better accuracy.
|
// More robust sentence splitter using Intl.Segmenter for better accuracy.
|
||||||
function splitIntoSentences(text: string): string[] {
|
function splitIntoSentences(text: string): string[] {
|
||||||
@ -49,7 +50,9 @@ export const TTSProvider = ({
|
|||||||
}) => {
|
}) => {
|
||||||
// Combine pages and split into sentences.
|
// Combine pages and split into sentences.
|
||||||
const fullText = pages.join("\n");
|
const fullText = pages.join("\n");
|
||||||
const sentences = splitIntoSentences(fullText);
|
const sentences = splitIntoSentences(fullText).filter(
|
||||||
|
(sentence) => sentence.trim() !== "\\n" && sentence.trim() !== ""
|
||||||
|
);
|
||||||
|
|
||||||
const [currentSentence, setCurrentSentence] = useState(0);
|
const [currentSentence, setCurrentSentence] = useState(0);
|
||||||
const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>(
|
const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>(
|
||||||
@ -95,6 +98,7 @@ export const TTSProvider = ({
|
|||||||
localStorage.setItem(key, e.data.audio);
|
localStorage.setItem(key, e.data.audio);
|
||||||
resolve(e.data.audio);
|
resolve(e.data.audio);
|
||||||
} else if (e.data.status === "error") {
|
} else if (e.data.status === "error") {
|
||||||
|
toast.error(`Error generating audio: ${e.data.error}`);
|
||||||
reject(e.data.error);
|
reject(e.data.error);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -163,6 +167,7 @@ export const TTSProvider = ({
|
|||||||
const end = Math.min(sentences.length, currentSentence + 3);
|
const end = Math.min(sentences.length, currentSentence + 3);
|
||||||
for (let i = currentSentence; i < end; i++) {
|
for (let i = currentSentence; i < end; i++) {
|
||||||
if (!newBuffer[i]) {
|
if (!newBuffer[i]) {
|
||||||
|
console.log("Preloading TTS for sentence:", i, sentences[i]);
|
||||||
newBuffer[i] = await generateTTSForIndex(
|
newBuffer[i] = await generateTTSForIndex(
|
||||||
removeMarkdown(sentences[i]),
|
removeMarkdown(sentences[i]),
|
||||||
i
|
i
|
||||||
@ -191,7 +196,11 @@ export const TTSProvider = ({
|
|||||||
}
|
}
|
||||||
if (audioRef.current) {
|
if (audioRef.current) {
|
||||||
audioRef.current.src = audioUrl;
|
audioRef.current.src = audioUrl;
|
||||||
await audioRef.current.play();
|
await new Promise((res) => {
|
||||||
|
audioRef.current!.play();
|
||||||
|
|
||||||
|
audioRef.current!.onended = () => res(true);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -202,10 +211,13 @@ export const TTSProvider = ({
|
|||||||
|
|
||||||
const playInOrder = async (index: number) => {
|
const playInOrder = async (index: number) => {
|
||||||
if (index < 0 || index >= sentences.length) return;
|
if (index < 0 || index >= sentences.length) return;
|
||||||
|
console.log("Playing in order from index:", index);
|
||||||
setCurrentSentence(index);
|
setCurrentSentence(index);
|
||||||
for (let i = index; i < sentences.length; i++) {
|
for (let i = index; i < sentences.length; i++) {
|
||||||
|
console.log("Playing sentence:", i, sentences[i]);
|
||||||
await playSentence(i);
|
await playSentence(i);
|
||||||
if (i < sentences.length - 1) {
|
if (i < sentences.length - 1) {
|
||||||
|
console.log("Waiting for next sentence...");
|
||||||
await new Promise((resolve) => setTimeout(resolve, 1000));
|
await new Promise((resolve) => setTimeout(resolve, 1000));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,15 @@ async function detectWebGPU() {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function blobToBase64(blob) {
|
||||||
|
return new Promise((resolve, _) => {
|
||||||
|
const reader = new FileReader();
|
||||||
|
reader.onloadend = () => resolve(reader.result);
|
||||||
|
reader.readAsDataURL(blob);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
// Device detection
|
// Device detection
|
||||||
const device = (await detectWebGPU()) ? "webgpu" : "wasm";
|
const device = (await detectWebGPU()) ? "webgpu" : "wasm";
|
||||||
self.postMessage({ status: "device", device });
|
self.postMessage({ status: "device", device });
|
||||||
@ -20,6 +29,10 @@ const model_id = "onnx-community/Kokoro-82M-v1.0-ONNX";
|
|||||||
const tts = await KokoroTTS.from_pretrained(model_id, {
|
const tts = await KokoroTTS.from_pretrained(model_id, {
|
||||||
dtype: device === "wasm" ? "q8" : "fp32",
|
dtype: device === "wasm" ? "q8" : "fp32",
|
||||||
device,
|
device,
|
||||||
|
progressCallback: (progress) => {
|
||||||
|
self.postMessage({ status: "progress", progress });
|
||||||
|
console.log(`Loading progress: ${progress * 100}%`);
|
||||||
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log("Kokoro TTS model loaded successfully");
|
console.log("Kokoro TTS model loaded successfully");
|
||||||
@ -34,13 +47,14 @@ self.addEventListener("message", async (e) => {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
// Generate speech
|
// Generate speech
|
||||||
|
console.log(`Generating speech for text: "${text}" with voice: ${voice}`);
|
||||||
const audio = await tts.generate(text, { voice });
|
const audio = await tts.generate(text, { voice });
|
||||||
|
|
||||||
// Send the audio file back to the main thread
|
// Send the audio file back to the main thread
|
||||||
const blob = audio.toBlob();
|
const blob = audio.toBlob();
|
||||||
self.postMessage({
|
self.postMessage({
|
||||||
status: "complete",
|
status: "complete",
|
||||||
audio: URL.createObjectURL(blob),
|
audio: await blobToBase64(blob),
|
||||||
text,
|
text,
|
||||||
});
|
});
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user