266 lines
7.2 KiB
TypeScript
266 lines
7.2 KiB
TypeScript
"use client";
|
|
import React, {
|
|
createContext,
|
|
useContext,
|
|
useState,
|
|
useEffect,
|
|
useRef,
|
|
ReactNode,
|
|
} from "react";
|
|
import removeMarkdown from "remove-markdown";
|
|
|
|
// More robust sentence splitter using Intl.Segmenter for better accuracy.
|
|
function splitIntoSentences(text: string): string[] {
|
|
if (typeof Intl !== "undefined" && Intl.Segmenter) {
|
|
const segmenter = new Intl.Segmenter("en", { granularity: "sentence" });
|
|
return Array.from(segmenter.segment(text)).map(
|
|
(segment) => segment.segment
|
|
);
|
|
}
|
|
// Fallback to regex-based splitting if Intl.Segmenter is unavailable.
|
|
return text.match(/[^\.!\?]+[\.!\?]+/g) || [text];
|
|
}
|
|
|
|
interface TTSContextType {
|
|
sentences: string[];
|
|
currentSentence: number;
|
|
ttsBuffer: (string | null)[];
|
|
voices: any[];
|
|
selectedSpeaker: string;
|
|
status: "ready" | "running" | null;
|
|
setSelectedSpeaker: (speaker: string) => void;
|
|
setCurrentSentence: (index: number) => void;
|
|
playSentence: (index: number) => void;
|
|
skipToSentence: (index: number) => void;
|
|
playInOrder: (index: number) => void;
|
|
pause: () => void;
|
|
resume: () => void;
|
|
stop: () => void;
|
|
}
|
|
|
|
const TTSContext = createContext<TTSContextType | undefined>(undefined);
|
|
|
|
export const TTSProvider = ({
|
|
pages,
|
|
children,
|
|
}: {
|
|
pages: string[];
|
|
children: ReactNode;
|
|
}) => {
|
|
// Combine pages and split into sentences.
|
|
const fullText = pages.join("\n");
|
|
const sentences = splitIntoSentences(fullText);
|
|
|
|
const [currentSentence, setCurrentSentence] = useState(0);
|
|
const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>(
|
|
Array(sentences.length).fill(null)
|
|
);
|
|
const audioRef = useRef<HTMLAudioElement>(null);
|
|
|
|
// Create a reference to the worker object.
|
|
const worker = useRef<Worker>(null);
|
|
|
|
const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart");
|
|
|
|
const [playing, setPlaying] = useState(false);
|
|
const [sentence, setSentence] = useState<number>();
|
|
const [voices, setVoices] = useState<any[]>([]);
|
|
const [status, setStatus] = useState<"ready" | "running" | null>(null);
|
|
const [error, setError] = useState(null);
|
|
const [loadingMessage, setLoadingMessage] = useState("Loading...");
|
|
|
|
const [results, setResults] = useState<{ text: string; src: string }[]>([]);
|
|
|
|
async function generateTTSForIndex(
|
|
sentence: string,
|
|
index: number
|
|
): Promise<string> {
|
|
const key = `tts-${index}`;
|
|
const cached = localStorage.getItem(key);
|
|
if (cached) {
|
|
return cached;
|
|
}
|
|
worker.current!.postMessage({
|
|
type: "generate",
|
|
text: sentence,
|
|
voice: selectedSpeaker,
|
|
});
|
|
setStatus("running");
|
|
setLoadingMessage("Generating audio...");
|
|
return new Promise((resolve, reject) => {
|
|
worker.current!.addEventListener(
|
|
"message",
|
|
(e: any) => {
|
|
if (e.data.status === "complete") {
|
|
localStorage.setItem(key, e.data.audio);
|
|
resolve(e.data.audio);
|
|
} else if (e.data.status === "error") {
|
|
reject(e.data.error);
|
|
}
|
|
},
|
|
{ once: true }
|
|
);
|
|
});
|
|
}
|
|
|
|
// We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
|
|
useEffect(() => {
|
|
// Create the worker if it does not yet exist.
|
|
console.log("Initializing worker...");
|
|
worker.current ??= new Worker("/workers/kokoro-worker.js", {
|
|
type: "module",
|
|
});
|
|
|
|
console.log("Worker initialized");
|
|
|
|
// Create a callback function for messages from the worker thread.
|
|
const onMessageReceived = (e: any) => {
|
|
switch (e.data.status) {
|
|
case "device":
|
|
setLoadingMessage(`Loading model (device="${e.data.device}")`);
|
|
break;
|
|
case "ready":
|
|
setStatus("ready");
|
|
setVoices(e.data.voices);
|
|
break;
|
|
case "error":
|
|
setError(e.data.data);
|
|
break;
|
|
case "complete":
|
|
const { audio, text } = e.data;
|
|
// Generation complete: re-enable the "Generate" button
|
|
setResults((prev) => [{ text, src: audio }, ...prev]);
|
|
setStatus("ready");
|
|
break;
|
|
}
|
|
};
|
|
|
|
console.log("onmessagereceived");
|
|
|
|
const onErrorReceived = (e: any) => {
|
|
console.error("Worker error:", e);
|
|
setError(e.message);
|
|
};
|
|
|
|
console.log("Attaching event listeners to worker");
|
|
|
|
// Attach the callback function as an event listener.
|
|
worker.current.addEventListener("message", onMessageReceived);
|
|
worker.current.addEventListener("error", onErrorReceived);
|
|
|
|
console.log(worker.current);
|
|
// Define a cleanup function for when the component is unmounted.
|
|
return () => {
|
|
worker.current!.removeEventListener("message", onMessageReceived);
|
|
worker.current!.removeEventListener("error", onErrorReceived);
|
|
};
|
|
}, []);
|
|
|
|
// Pre-buffer current and next 2 sentences.
|
|
useEffect(() => {
|
|
async function preloadBuffer() {
|
|
const newBuffer = [...ttsBuffer];
|
|
const end = Math.min(sentences.length, currentSentence + 3);
|
|
for (let i = currentSentence; i < end; i++) {
|
|
if (!newBuffer[i]) {
|
|
newBuffer[i] = await generateTTSForIndex(
|
|
removeMarkdown(sentences[i]),
|
|
i
|
|
);
|
|
}
|
|
}
|
|
setTtsBuffer(newBuffer);
|
|
}
|
|
preloadBuffer();
|
|
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
}, [currentSentence, sentences.join(" ")]);
|
|
|
|
const playSentence = async (index: number) => {
|
|
setCurrentSentence(index);
|
|
let audioUrl = ttsBuffer[index];
|
|
if (!audioUrl) {
|
|
audioUrl = await generateTTSForIndex(
|
|
removeMarkdown(sentences[index]),
|
|
index
|
|
);
|
|
setTtsBuffer((prev) => {
|
|
const updated = [...prev];
|
|
updated[index] = audioUrl;
|
|
return updated;
|
|
});
|
|
}
|
|
if (audioRef.current) {
|
|
audioRef.current.src = audioUrl;
|
|
await audioRef.current.play();
|
|
}
|
|
};
|
|
|
|
const skipToSentence = (index: number) => {
|
|
if (index < 0 || index >= sentences.length) return;
|
|
playSentence(index);
|
|
};
|
|
|
|
const playInOrder = async (index: number) => {
|
|
if (index < 0 || index >= sentences.length) return;
|
|
setCurrentSentence(index);
|
|
for (let i = index; i < sentences.length; i++) {
|
|
await playSentence(i);
|
|
if (i < sentences.length - 1) {
|
|
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
}
|
|
}
|
|
};
|
|
|
|
const pause = () => {
|
|
if (audioRef.current) {
|
|
audioRef.current.pause();
|
|
}
|
|
};
|
|
|
|
const resume = () => {
|
|
if (audioRef.current) {
|
|
audioRef.current.play();
|
|
}
|
|
};
|
|
|
|
const stop = () => {
|
|
if (audioRef.current) {
|
|
audioRef.current.pause();
|
|
audioRef.current.currentTime = 0;
|
|
}
|
|
};
|
|
|
|
const value: TTSContextType = {
|
|
sentences,
|
|
currentSentence,
|
|
ttsBuffer,
|
|
voices,
|
|
playSentence,
|
|
skipToSentence,
|
|
selectedSpeaker,
|
|
setSelectedSpeaker,
|
|
setCurrentSentence,
|
|
playInOrder,
|
|
pause,
|
|
resume,
|
|
stop,
|
|
status,
|
|
};
|
|
|
|
return (
|
|
<TTSContext.Provider value={value}>
|
|
{children}
|
|
{/* Hidden audio element used for playback */}
|
|
<audio ref={audioRef} style={{ display: "none" }} />
|
|
</TTSContext.Provider>
|
|
);
|
|
};
|
|
|
|
export const useTTS = (): TTSContextType => {
|
|
const context = useContext(TTSContext);
|
|
if (!context) {
|
|
throw new Error("useTTS must be used within a TTSProvider");
|
|
}
|
|
return context;
|
|
};
|