neuroread/components/TTSProvider.tsx

232 lines
6.4 KiB
TypeScript

"use client";
import { createClient } from "@/utils/supabase/client";
import React, {
createContext,
useContext,
useState,
useEffect,
useRef,
ReactNode,
} from "react";
import removeMarkdown from "remove-markdown";
import { toast } from "sonner";
import { synthesizeTTSAction } from "@/app/actions";
// More robust sentence splitter using Intl.Segmenter for better accuracy.
function splitIntoSentences(text: string): string[] {
if (typeof Intl !== "undefined" && Intl.Segmenter) {
const segmenter = new Intl.Segmenter("en", { granularity: "sentence" });
return Array.from(segmenter.segment(text)).map(
(segment) => segment.segment
);
}
// Fallback to regex-based splitting if Intl.Segmenter is unavailable.
return text.match(/[^\.!\?]+[\.!\?]+/g) || [text];
}
interface TTSContextType {
sentences: string[];
currentSentence: number;
voices: any[];
selectedSpeaker: string;
status: "ready" | "running" | "paused" | null;
setSelectedSpeaker: (speaker: string) => void;
setCurrentSentence: (index: number) => void;
playSentence: (index: number) => void;
skipToSentence: (index: number) => void;
playInOrder: (index: number) => void;
pause: () => void;
resume: () => void;
stop: () => void;
}
const TTSContext = createContext<TTSContextType | undefined>(undefined);
export const TTSProvider = ({
pages,
children,
}: {
pages: string[];
children: ReactNode;
}) => {
const supabase = createClient();
const fullText = pages.join("\n");
const sentences = splitIntoSentences(fullText).filter(
(sentence) => sentence.trim() !== "\\n" && sentence.trim() !== ""
);
const [currentSentence, setCurrentSentence] = useState(0);
const audioRef = useRef<HTMLAudioElement>(null);
const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart");
const [voices, setVoices] = useState<any[]>([]);
const [status, setStatus] = useState<"ready" | "running" | "paused" | null>(
"ready"
);
// Cache for preloaded audio
const audioCache = useRef<Map<number, string>>(new Map());
// Currently processing TTS
const [processing, setProcessing] = useState<number[]>([]);
// Preload audio for a range of sentences
const preloadAudio = async (startIndex: number, range: number = 3) => {
for (
let i = startIndex;
i < Math.min(sentences.length, startIndex + range);
i++
) {
if (!audioCache.current.has(i) && !processing.includes(i)) {
console.log(`Preloading audio for sentence ${i}: ${sentences[i]}`);
try {
setProcessing((prev) => [...prev, i]); // Add to processing
const audioUrl = await generateTTS(sentences[i], i);
audioCache.current.set(i, audioUrl); // Cache the audio URL
} catch (error) {
console.error(`Error preloading audio for sentence ${i}:`, error);
}
}
}
};
async function generateTTS(sentence: string, index: number): Promise<string> {
try {
const blob = await synthesizeTTSAction({
text: sentence,
previous_text: sentences[index - 1] || "",
next_text: sentences[index + 1] || "",
voice: selectedSpeaker,
index,
});
if (!blob) {
throw new Error("Failed to generate TTS");
}
const audioUrl = URL.createObjectURL(blob);
return audioUrl;
} catch (error) {
console.error("Error generating TTS:", error);
toast.error("Failed to generate TTS. Please try again.");
throw error;
}
}
const playSentence = async (index: number) => {
setCurrentSentence(index);
const sentence = removeMarkdown(sentences[index]);
try {
let audioUrl = audioCache.current.get(index);
if (!audioUrl) {
console.log(
`Audio not preloaded for sentence ${index}, generating on the fly.`
);
audioUrl = await generateTTS(sentence, index);
audioCache.current.set(index, audioUrl); // Cache the generated audio
}
if (audioRef.current) {
audioRef.current.src = audioUrl;
await new Promise((res) => {
audioRef.current!.play();
audioRef.current!.onended = () => res(true);
});
}
} catch (error) {
console.error("Error playing sentence:", error);
}
};
const skipToSentence = (index: number) => {
if (index < 0 || index >= sentences.length) return;
playSentence(index);
};
const playInOrder = async (index: number) => {
if (index < 0 || index >= sentences.length) return;
setCurrentSentence(index);
// Introduce a flag to track whether playback should continue
let shouldContinue = true;
for (let i = index; i < sentences.length; i++) {
if (!shouldContinue) {
console.log("Playback stopped or paused.");
break;
}
console.log("Playing sentence:", i, sentences[i]);
try {
await playSentence(i);
preloadAudio(i + 1); // Preload the next sentence after playing
} catch (error) {
console.error("Error playing sentence:", error);
break; // Stop playback on error
}
}
// Reset the playback state when done
setStatus("ready");
};
const pause = () => {
if (audioRef.current) {
audioRef.current.pause();
setStatus("paused"); // Update the status to paused
}
};
const resume = () => {
if (audioRef.current && status === "paused") {
audioRef.current.play();
setStatus("running"); // Update the status to running
}
};
const stop = () => {
if (audioRef.current) {
audioRef.current.pause();
audioRef.current.currentTime = 0;
setStatus("ready"); // Update the status to ready
}
// Reset the playback flag
shouldContinue = false;
};
// Preload sentences when the current sentence changes
useEffect(() => {
preloadAudio(currentSentence);
}, [currentSentence]);
const value: TTSContextType = {
sentences,
currentSentence,
voices,
playSentence,
skipToSentence,
selectedSpeaker,
setSelectedSpeaker,
setCurrentSentence,
playInOrder,
pause,
resume,
stop,
status,
};
return (
<TTSContext.Provider value={value}>
{children}
<audio ref={audioRef} style={{ display: "none" }} />
</TTSContext.Provider>
);
};
export const useTTS = (): TTSContextType => {
const context = useContext(TTSContext);
if (!context) {
throw new Error("useTTS must be used within a TTSProvider");
}
return context;
};