neuroread/components/TTSProvider.tsx
2025-04-24 17:57:19 -04:00

278 lines
7.7 KiB
TypeScript

"use client";
import React, {
createContext,
useContext,
useState,
useEffect,
useRef,
ReactNode,
} from "react";
import removeMarkdown from "remove-markdown";
import { toast } from "sonner";
// More robust sentence splitter using Intl.Segmenter for better accuracy.
function splitIntoSentences(text: string): string[] {
if (typeof Intl !== "undefined" && Intl.Segmenter) {
const segmenter = new Intl.Segmenter("en", { granularity: "sentence" });
return Array.from(segmenter.segment(text)).map(
(segment) => segment.segment
);
}
// Fallback to regex-based splitting if Intl.Segmenter is unavailable.
return text.match(/[^\.!\?]+[\.!\?]+/g) || [text];
}
interface TTSContextType {
sentences: string[];
currentSentence: number;
ttsBuffer: (string | null)[];
voices: any[];
selectedSpeaker: string;
status: "ready" | "running" | null;
setSelectedSpeaker: (speaker: string) => void;
setCurrentSentence: (index: number) => void;
playSentence: (index: number) => void;
skipToSentence: (index: number) => void;
playInOrder: (index: number) => void;
pause: () => void;
resume: () => void;
stop: () => void;
}
const TTSContext = createContext<TTSContextType | undefined>(undefined);
export const TTSProvider = ({
pages,
children,
}: {
pages: string[];
children: ReactNode;
}) => {
// Combine pages and split into sentences.
const fullText = pages.join("\n");
const sentences = splitIntoSentences(fullText).filter(
(sentence) => sentence.trim() !== "\\n" && sentence.trim() !== ""
);
const [currentSentence, setCurrentSentence] = useState(0);
const [ttsBuffer, setTtsBuffer] = useState<(string | null)[]>(
Array(sentences.length).fill(null)
);
const audioRef = useRef<HTMLAudioElement>(null);
// Create a reference to the worker object.
const worker = useRef<Worker>(null);
const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart");
const [playing, setPlaying] = useState(false);
const [sentence, setSentence] = useState<number>();
const [voices, setVoices] = useState<any[]>([]);
const [status, setStatus] = useState<"ready" | "running" | null>(null);
const [error, setError] = useState(null);
const [loadingMessage, setLoadingMessage] = useState("Loading...");
const [results, setResults] = useState<{ text: string; src: string }[]>([]);
async function generateTTSForIndex(
sentence: string,
index: number
): Promise<string> {
const key = `tts-${index}`;
const cached = localStorage.getItem(key);
if (cached) {
return cached;
}
worker.current!.postMessage({
type: "generate",
text: sentence,
voice: selectedSpeaker,
});
setStatus("running");
setLoadingMessage("Generating audio...");
return new Promise((resolve, reject) => {
worker.current!.addEventListener(
"message",
(e: any) => {
if (e.data.status === "complete") {
localStorage.setItem(key, e.data.audio);
resolve(e.data.audio);
} else if (e.data.status === "error") {
toast.error(`Error generating audio: ${e.data.error}`);
reject(e.data.error);
}
},
{ once: true }
);
});
}
// We use the `useEffect` hook to setup the worker as soon as the `App` component is mounted.
useEffect(() => {
// Create the worker if it does not yet exist.
console.log("Initializing worker...");
worker.current ??= new Worker("/workers/kokoro-worker.js", {
type: "module",
});
console.log("Worker initialized");
// Create a callback function for messages from the worker thread.
const onMessageReceived = (e: any) => {
switch (e.data.status) {
case "device":
setLoadingMessage(`Loading model (device="${e.data.device}")`);
break;
case "ready":
setStatus("ready");
setVoices(e.data.voices);
break;
case "error":
setError(e.data.data);
break;
case "complete":
const { audio, text } = e.data;
// Generation complete: re-enable the "Generate" button
setResults((prev) => [{ text, src: audio }, ...prev]);
setStatus("ready");
break;
}
};
console.log("onmessagereceived");
const onErrorReceived = (e: any) => {
console.error("Worker error:", e);
setError(e.message);
};
console.log("Attaching event listeners to worker");
// Attach the callback function as an event listener.
worker.current.addEventListener("message", onMessageReceived);
worker.current.addEventListener("error", onErrorReceived);
console.log(worker.current);
// Define a cleanup function for when the component is unmounted.
return () => {
worker.current!.removeEventListener("message", onMessageReceived);
worker.current!.removeEventListener("error", onErrorReceived);
};
}, []);
// Pre-buffer current and next 2 sentences.
useEffect(() => {
async function preloadBuffer() {
const newBuffer = [...ttsBuffer];
const end = Math.min(sentences.length, currentSentence + 3);
for (let i = currentSentence; i < end; i++) {
if (!newBuffer[i]) {
console.log("Preloading TTS for sentence:", i, sentences[i]);
newBuffer[i] = await generateTTSForIndex(
removeMarkdown(sentences[i]),
i
);
}
}
setTtsBuffer(newBuffer);
}
preloadBuffer();
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [currentSentence, sentences.join(" ")]);
const playSentence = async (index: number) => {
setCurrentSentence(index);
let audioUrl = ttsBuffer[index];
if (!audioUrl) {
audioUrl = await generateTTSForIndex(
removeMarkdown(sentences[index]),
index
);
setTtsBuffer((prev) => {
const updated = [...prev];
updated[index] = audioUrl;
return updated;
});
}
if (audioRef.current) {
audioRef.current.src = audioUrl;
await new Promise((res) => {
audioRef.current!.play();
audioRef.current!.onended = () => res(true);
});
}
};
const skipToSentence = (index: number) => {
if (index < 0 || index >= sentences.length) return;
playSentence(index);
};
const playInOrder = async (index: number) => {
if (index < 0 || index >= sentences.length) return;
console.log("Playing in order from index:", index);
setCurrentSentence(index);
for (let i = index; i < sentences.length; i++) {
console.log("Playing sentence:", i, sentences[i]);
await playSentence(i);
if (i < sentences.length - 1) {
console.log("Waiting for next sentence...");
await new Promise((resolve) => setTimeout(resolve, 1000));
}
}
};
const pause = () => {
if (audioRef.current) {
audioRef.current.pause();
}
};
const resume = () => {
if (audioRef.current) {
audioRef.current.play();
}
};
const stop = () => {
if (audioRef.current) {
audioRef.current.pause();
audioRef.current.currentTime = 0;
}
};
const value: TTSContextType = {
sentences,
currentSentence,
ttsBuffer,
voices,
playSentence,
skipToSentence,
selectedSpeaker,
setSelectedSpeaker,
setCurrentSentence,
playInOrder,
pause,
resume,
stop,
status,
};
return (
<TTSContext.Provider value={value}>
{children}
{/* Hidden audio element used for playback */}
<audio ref={audioRef} style={{ display: "none" }} />
</TTSContext.Provider>
);
};
export const useTTS = (): TTSContextType => {
const context = useContext(TTSContext);
if (!context) {
throw new Error("useTTS must be used within a TTSProvider");
}
return context;
};