232 lines
6.4 KiB
TypeScript
232 lines
6.4 KiB
TypeScript
"use client";
|
|
import { createClient } from "@/utils/supabase/client";
|
|
import React, {
|
|
createContext,
|
|
useContext,
|
|
useState,
|
|
useEffect,
|
|
useRef,
|
|
ReactNode,
|
|
} from "react";
|
|
import removeMarkdown from "remove-markdown";
|
|
import { toast } from "sonner";
|
|
import { synthesizeTTSAction } from "@/app/actions";
|
|
|
|
// More robust sentence splitter using Intl.Segmenter for better accuracy.
|
|
function splitIntoSentences(text: string): string[] {
|
|
if (typeof Intl !== "undefined" && Intl.Segmenter) {
|
|
const segmenter = new Intl.Segmenter("en", { granularity: "sentence" });
|
|
return Array.from(segmenter.segment(text)).map(
|
|
(segment) => segment.segment
|
|
);
|
|
}
|
|
// Fallback to regex-based splitting if Intl.Segmenter is unavailable.
|
|
return text.match(/[^\.!\?]+[\.!\?]+/g) || [text];
|
|
}
|
|
|
|
interface TTSContextType {
|
|
sentences: string[];
|
|
currentSentence: number;
|
|
voices: any[];
|
|
selectedSpeaker: string;
|
|
status: "ready" | "running" | "paused" | null;
|
|
setSelectedSpeaker: (speaker: string) => void;
|
|
setCurrentSentence: (index: number) => void;
|
|
playSentence: (index: number) => void;
|
|
skipToSentence: (index: number) => void;
|
|
playInOrder: (index: number) => void;
|
|
pause: () => void;
|
|
resume: () => void;
|
|
stop: () => void;
|
|
}
|
|
|
|
const TTSContext = createContext<TTSContextType | undefined>(undefined);
|
|
|
|
export const TTSProvider = ({
|
|
pages,
|
|
children,
|
|
}: {
|
|
pages: string[];
|
|
children: ReactNode;
|
|
}) => {
|
|
const supabase = createClient();
|
|
const fullText = pages.join("\n");
|
|
const sentences = splitIntoSentences(fullText).filter(
|
|
(sentence) => sentence.trim() !== "\\n" && sentence.trim() !== ""
|
|
);
|
|
|
|
const [currentSentence, setCurrentSentence] = useState(0);
|
|
const audioRef = useRef<HTMLAudioElement>(null);
|
|
|
|
const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart");
|
|
const [voices, setVoices] = useState<any[]>([]);
|
|
const [status, setStatus] = useState<"ready" | "running" | "paused" | null>(
|
|
"ready"
|
|
);
|
|
|
|
// Cache for preloaded audio
|
|
const audioCache = useRef<Map<number, string>>(new Map());
|
|
|
|
// Currently processing TTS
|
|
const [processing, setProcessing] = useState<number[]>([]);
|
|
|
|
// Preload audio for a range of sentences
|
|
const preloadAudio = async (startIndex: number, range: number = 3) => {
|
|
for (
|
|
let i = startIndex;
|
|
i < Math.min(sentences.length, startIndex + range);
|
|
i++
|
|
) {
|
|
if (!audioCache.current.has(i) && !processing.includes(i)) {
|
|
console.log(`Preloading audio for sentence ${i}: ${sentences[i]}`);
|
|
try {
|
|
setProcessing((prev) => [...prev, i]); // Add to processing
|
|
const audioUrl = await generateTTS(sentences[i], i);
|
|
audioCache.current.set(i, audioUrl); // Cache the audio URL
|
|
} catch (error) {
|
|
console.error(`Error preloading audio for sentence ${i}:`, error);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
async function generateTTS(sentence: string, index: number): Promise<string> {
|
|
try {
|
|
const blob = await synthesizeTTSAction({
|
|
text: sentence,
|
|
previous_text: sentences[index - 1] || "",
|
|
next_text: sentences[index + 1] || "",
|
|
voice: selectedSpeaker,
|
|
index,
|
|
});
|
|
if (!blob) {
|
|
throw new Error("Failed to generate TTS");
|
|
}
|
|
const audioUrl = URL.createObjectURL(blob);
|
|
return audioUrl;
|
|
} catch (error) {
|
|
console.error("Error generating TTS:", error);
|
|
toast.error("Failed to generate TTS. Please try again.");
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
const playSentence = async (index: number) => {
|
|
setCurrentSentence(index);
|
|
|
|
const sentence = removeMarkdown(sentences[index]);
|
|
try {
|
|
let audioUrl = audioCache.current.get(index);
|
|
if (!audioUrl) {
|
|
console.log(
|
|
`Audio not preloaded for sentence ${index}, generating on the fly.`
|
|
);
|
|
audioUrl = await generateTTS(sentence, index);
|
|
audioCache.current.set(index, audioUrl); // Cache the generated audio
|
|
}
|
|
|
|
if (audioRef.current) {
|
|
audioRef.current.src = audioUrl;
|
|
await new Promise((res) => {
|
|
audioRef.current!.play();
|
|
audioRef.current!.onended = () => res(true);
|
|
});
|
|
}
|
|
} catch (error) {
|
|
console.error("Error playing sentence:", error);
|
|
}
|
|
};
|
|
|
|
const skipToSentence = (index: number) => {
|
|
if (index < 0 || index >= sentences.length) return;
|
|
playSentence(index);
|
|
};
|
|
|
|
const playInOrder = async (index: number) => {
|
|
if (index < 0 || index >= sentences.length) return;
|
|
setCurrentSentence(index);
|
|
|
|
// Introduce a flag to track whether playback should continue
|
|
let shouldContinue = true;
|
|
|
|
for (let i = index; i < sentences.length; i++) {
|
|
if (!shouldContinue) {
|
|
console.log("Playback stopped or paused.");
|
|
break;
|
|
}
|
|
|
|
console.log("Playing sentence:", i, sentences[i]);
|
|
try {
|
|
await playSentence(i);
|
|
preloadAudio(i + 1); // Preload the next sentence after playing
|
|
} catch (error) {
|
|
console.error("Error playing sentence:", error);
|
|
break; // Stop playback on error
|
|
}
|
|
}
|
|
|
|
// Reset the playback state when done
|
|
setStatus("ready");
|
|
};
|
|
|
|
const pause = () => {
|
|
if (audioRef.current) {
|
|
audioRef.current.pause();
|
|
setStatus("paused"); // Update the status to paused
|
|
}
|
|
};
|
|
|
|
const resume = () => {
|
|
if (audioRef.current && status === "paused") {
|
|
audioRef.current.play();
|
|
setStatus("running"); // Update the status to running
|
|
}
|
|
};
|
|
|
|
const stop = () => {
|
|
if (audioRef.current) {
|
|
audioRef.current.pause();
|
|
audioRef.current.currentTime = 0;
|
|
setStatus("ready"); // Update the status to ready
|
|
}
|
|
// Reset the playback flag
|
|
shouldContinue = false;
|
|
};
|
|
|
|
// Preload sentences when the current sentence changes
|
|
useEffect(() => {
|
|
preloadAudio(currentSentence);
|
|
}, [currentSentence]);
|
|
|
|
const value: TTSContextType = {
|
|
sentences,
|
|
currentSentence,
|
|
voices,
|
|
playSentence,
|
|
skipToSentence,
|
|
selectedSpeaker,
|
|
setSelectedSpeaker,
|
|
setCurrentSentence,
|
|
playInOrder,
|
|
pause,
|
|
resume,
|
|
stop,
|
|
status,
|
|
};
|
|
|
|
return (
|
|
<TTSContext.Provider value={value}>
|
|
{children}
|
|
<audio ref={audioRef} style={{ display: "none" }} />
|
|
</TTSContext.Provider>
|
|
);
|
|
};
|
|
|
|
export const useTTS = (): TTSContextType => {
|
|
const context = useContext(TTSContext);
|
|
if (!context) {
|
|
throw new Error("useTTS must be used within a TTSProvider");
|
|
}
|
|
return context;
|
|
};
|