refactor: update imports and improve TTS functionality with pause/resume support

This commit is contained in:
Jack Merrill 2025-05-07 19:30:38 -04:00
parent 2e2a0f28b4
commit 7f9bdee7f4
Signed by: jack
GPG Key ID: F6BFCA1B80EA6AF7
8 changed files with 272 additions and 86 deletions

View File

@ -115,7 +115,7 @@ export const synthesizeTTSAction = async (data: {
},
previous_text: data.previous_text,
next_text: data.next_text,
model_id: "eleven_multilingual_v2",
model_id: "eleven_flash_v2_5", // use eleven_multilingual_v2 if this doesnt sound good
}
);
const chunks: Buffer[] = [];

View File

@ -15,7 +15,7 @@ const client = new Mistral({ apiKey });
const PROCESSING_PROMPT = `
You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.**
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** If there is a title to the document, it should be the first heading.
Any images should be included.
Do not return the Markdown as a code block, only as a raw string, without any new lines.
@ -49,18 +49,36 @@ Return the final result as a text object with the following structure (without c
Do not return the text object as a code block, only as a raw string.
`;
async function getCitations(citationsStr: string) {
function getCitations(citationsStr: string) {
try {
const citations = JSON.parse(citationsStr).citations || {};
console.log("Parsing citations string:", citationsStr);
const citationsData = JSON.parse(citationsStr);
return
console.log("Sanitizing citations...");
const sanitizedCitations = citationsData.citations.map((citation: any) => {
const sanitizedText = citation.text.replace(
/(https?:\/\/[^\s]+)/g,
(url: string) => encodeURI(url)
);
return {
...citation,
text: sanitizedText,
};
});
console.log("Sanitized citations:", sanitizedCitations);
return sanitizedCitations;
} catch (err) {
console.error("Error parsing or sanitizing citations:", err);
return [];
}
}
export async function POST(req: NextRequest) {
console.log("Received POST request");
if (req.method === "OPTIONS") {
console.log("Handling OPTIONS request");
return new NextResponse(null, {
headers: {
...corsHeaders,
@ -69,27 +87,16 @@ export async function POST(req: NextRequest) {
});
}
try {
console.log("Parsing form data...");
const formData = await req.formData();
const accessToken = formData.get("access_token") as string;
const refreshToken = formData.get("refresh_token") as string;
if (!formData.has("file") || !accessToken || !refreshToken) {
return NextResponse.json(
{
error: "Missing required fields: file, access_token, or refresh_token",
},
{ status: 400 }
);
}
console.log("Creating Supabase client...");
const supabase = await createClient();
const file = formData.get("file") as File;
const fileName = file.name;
const uuid = crypto.randomUUID();
try {
// Authenticate the user
console.log("Authenticating user...");
const {
data: { user },
error: sessionError,
@ -99,23 +106,99 @@ export async function POST(req: NextRequest) {
});
if (sessionError) {
console.error("Failed to set session:", sessionError.message);
throw new Error("Failed to set session: " + sessionError.message);
}
if (!user) {
console.error("User not authenticated");
throw new Error("User not authenticated");
}
// Upload the file to Supabase storage
var reprocessing = false;
var uuid = crypto.randomUUID();
if (formData.has("id")) {
console.log("Reprocessing document...");
reprocessing = true;
console.log("File ID found in form data.");
const docId = formData.get("id");
console.log("Document ID:", docId, formData);
const { data: documentData, error: documentError } = await supabase
.from("documents")
.select("*")
.eq("id", docId!.toString())
.single();
if (documentError) {
console.error("Error fetching document record:", documentError);
throw new Error("Document record fetch failed");
}
if (documentData) {
await supabase
.from("documents")
.update({
is_processing: true,
})
.eq("id", documentData.id);
uuid = documentData.id;
} else {
console.error("Document record not found.");
throw new Error("Document record not found");
}
const { data: fileData, error: fileError } = await supabase.storage
.from("documents")
.download(`${user.id}/${uuid}.pdf`);
if (fileError) {
console.error("Error downloading file from storage:", fileError);
throw new Error("File download failed");
}
console.log("File downloaded from storage:", fileData);
formData.set("file", fileData);
}
if (
!reprocessing &&
(!formData.has("file") || !accessToken || !refreshToken)
) {
console.error(
"Missing required fields: file, access_token, or refresh_token"
);
return NextResponse.json(
{
error:
"Missing required fields: file, access_token, or refresh_token",
},
{ status: 400 }
);
}
let file = formData.get("file") as File;
const fileName = file.name;
if (!reprocessing) {
console.log("Generated UUID for file:", uuid);
console.log("Uploading file to Supabase storage...");
const { data: storageData, error: storageError } = await supabase.storage
.from("documents")
.upload(`${user.id}/${uuid}.pdf`, file);
if (storageError) {
console.error("Failed to upload file:", storageError.message);
throw new Error("Failed to upload file: " + storageError.message);
}
// Insert document record
console.log("Inserting document record...");
const { error: docError } = await supabase.from("documents").insert({
id: uuid,
file_name: file.name,
@ -125,28 +208,50 @@ export async function POST(req: NextRequest) {
});
if (docError) {
throw new Error("Failed to insert document record: " + docError.message);
console.error("Failed to insert document record:", docError.message);
throw new Error(
"Failed to insert document record: " + docError.message
);
}
} else {
console.log("Reprocessing document...");
const { error: docError } = await supabase
.from("documents")
.update({
is_processing: true,
})
.eq("id", uuid);
if (docError) {
console.error("Error updating document record:", docError);
throw new Error("Document record update failed");
}
console.log("Document record updated successfully.");
}
// Upload file to Mistral
console.log("Uploading file to Mistral...");
const uploadedPdf = await client.files.upload({
file: { fileName, content: file },
file: { fileName: `${uuid}.pdf`, content: file },
purpose: "ocr",
});
console.log("Getting signed URL from Mistral...");
const signedUrl = await client.files.getSignedUrl({
fileId: uploadedPdf.id,
});
// Process OCR
console.log("Processing OCR...");
const ocrResponse = await client.ocr.process({
model: "mistral-ocr-latest",
document: { type: "document_url", documentUrl: signedUrl.url },
includeImageBase64: true,
});
console.log("Processing OCR pages...");
const limit = pLimit(2);
const promises = ocrResponse.pages.map((page) =>
limit(async () => {
console.log("Processing page:", page);
const response = await client.chat.complete({
model: "mistral-small-latest",
messages: [
@ -161,9 +266,9 @@ export async function POST(req: NextRequest) {
const split = response.choices[0].message.content.split("---------");
const content = split[0].trim();
const citationsStr = split[1]?.trim() || "{}";
console.log(citationsStr);
console.log("Citations string:", citationsStr);
const citations = await getCitations(citationsStr);
const citations = getCitations(citationsStr);
return {
...page,
@ -175,18 +280,20 @@ export async function POST(req: NextRequest) {
const results = await Promise.all(promises);
// Update document record with OCR data
console.log("Updating document record with OCR data...");
const { error: updateError } = await supabase
.from("documents")
.update({ ocr_data: results, is_processing: false })
.eq("id", uuid);
if (updateError) {
console.error("Failed to update document record:", updateError.message);
throw new Error(
"Failed to update document record: " + updateError.message
);
}
console.log("Document processed successfully");
return NextResponse.json({
message: "Document processed successfully",
results,

View File

@ -23,6 +23,7 @@ export default function KokoroReader({ pages }: { pages: any[] }) {
playInOrder,
status,
pause,
resume,
} = useTTS();
const [playing, setPlaying] = useState(false);
@ -32,18 +33,17 @@ export default function KokoroReader({ pages }: { pages: any[] }) {
}, [status === "ready"]);
const play = () => {
if (playing) {
setPlaying(false);
return;
}
setPlaying(true);
if (!playing && status === "paused") {
resume();
} else {
playInOrder(currentSentence || 0);
}
setPlaying(true);
};
const paused = () => {
setPlaying(false);
pause();
pause(); // Call the pause function from TTSProvider
};
return (

View File

@ -90,7 +90,7 @@ export default function MarkdownRenderer({
<h3 className="text-lg font-medium mb-2 text-gray-300" {...props} />
),
h4: ({ node, ...props }) => (
<h4 className="text-lg font-medium mb-2 text-gray-300" {...props} />
<h4 className="text-lg font-bold mb-2 text-gray-300" {...props} />
),
p: ({ node, ...props }) => (
<p className="leading-7 text-gray-200" {...props} />
@ -147,7 +147,7 @@ export default function MarkdownRenderer({
{...props}
/>
</PopoverTrigger>
<PopoverContent className="w-56 overflow-hidden rounded-lg p-0">
<PopoverContent className="w-auto max-w-3xl bg-gray-900 overflow-hidden rounded-lg p-0">
<div className="p-4">
<p>{citation.text}</p>
</div>

View File

@ -29,7 +29,7 @@ interface TTSContextType {
currentSentence: number;
voices: any[];
selectedSpeaker: string;
status: "ready" | "running" | null;
status: "ready" | "running" | "paused" | null;
setSelectedSpeaker: (speaker: string) => void;
setCurrentSentence: (index: number) => void;
playSentence: (index: number) => void;
@ -60,11 +60,16 @@ export const TTSProvider = ({
const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart");
const [voices, setVoices] = useState<any[]>([]);
const [status, setStatus] = useState<"ready" | "running" | null>("ready");
const [status, setStatus] = useState<"ready" | "running" | "paused" | null>(
"ready"
);
// Cache for preloaded audio
const audioCache = useRef<Map<number, string>>(new Map());
// Currently processing TTS
const [processing, setProcessing] = useState<number[]>([]);
// Preload audio for a range of sentences
const preloadAudio = async (startIndex: number, range: number = 3) => {
for (
@ -72,9 +77,10 @@ export const TTSProvider = ({
i < Math.min(sentences.length, startIndex + range);
i++
) {
if (!audioCache.current.has(i)) {
if (!audioCache.current.has(i) && !processing.includes(i)) {
console.log(`Preloading audio for sentence ${i}: ${sentences[i]}`);
try {
setProcessing((prev) => [...prev, i]); // Add to processing
const audioUrl = await generateTTS(sentences[i], i);
audioCache.current.set(i, audioUrl); // Cache the audio URL
} catch (error) {
@ -140,7 +146,15 @@ export const TTSProvider = ({
if (index < 0 || index >= sentences.length) return;
setCurrentSentence(index);
// Introduce a flag to track whether playback should continue
let shouldContinue = true;
for (let i = index; i < sentences.length; i++) {
if (!shouldContinue) {
console.log("Playback stopped or paused.");
break;
}
console.log("Playing sentence:", i, sentences[i]);
try {
await playSentence(i);
@ -150,17 +164,22 @@ export const TTSProvider = ({
break; // Stop playback on error
}
}
// Reset the playback state when done
setStatus("ready");
};
const pause = () => {
if (audioRef.current) {
audioRef.current.pause();
setStatus("paused"); // Update the status to paused
}
};
const resume = () => {
if (audioRef.current) {
if (audioRef.current && status === "paused") {
audioRef.current.play();
setStatus("running"); // Update the status to running
}
};
@ -168,7 +187,10 @@ export const TTSProvider = ({
if (audioRef.current) {
audioRef.current.pause();
audioRef.current.currentTime = 0;
setStatus("ready"); // Update the status to ready
}
// Reset the playback flag
shouldContinue = false;
};
// Preload sentences when the current sentence changes

View File

@ -17,7 +17,7 @@ import {
Upload,
} from "lucide-react";
import { NavDocuments } from "@/components/nav-favorites";
import { NavDocuments } from "@/components/nav-documents";
import { NavMain } from "@/components/nav-main";
import { NavSecondary } from "@/components/nav-secondary";
import {

View File

@ -29,9 +29,11 @@ import {
} from "@/components/ui/sidebar";
import { createClient } from "@/utils/supabase/client";
import { toast } from "sonner";
import { SSE } from "sse.js";
import { useEffect, useState } from "react";
export function NavDocuments({
documents,
documents: ogDocuments,
}: {
documents: {
id: string;
@ -43,6 +45,53 @@ export function NavDocuments({
}) {
const { isMobile } = useSidebar();
const supabase = createClient();
const [documents, setDocuments] = useState(ogDocuments);
useEffect(() => {
// watch for changes in the documents table, update the state when it changes
const handleRecordInserted = (payload: any) => {
const newDocument = payload.new;
setDocuments((prev) => [...prev, newDocument]);
};
const handleRecordUpdated = (payload: any) => {
const updatedDocument = payload.new;
setDocuments((prev) =>
prev.map((doc) =>
doc.id === updatedDocument.id ? updatedDocument : doc
)
);
};
const handleRecordDeleted = (payload: any) => {
const deletedDocument = payload.old;
setDocuments((prev) =>
prev.filter((doc) => doc.id !== deletedDocument.id)
);
};
const subscription = supabase
.channel("documents")
.on(
"postgres_changes",
{ event: "INSERT", schema: "public", table: "documents" },
handleRecordInserted
)
.on(
"postgres_changes",
{ event: "UPDATE", schema: "public", table: "documents" },
handleRecordUpdated
)
.on(
"postgres_changes",
{ event: "DELETE", schema: "public", table: "documents" },
handleRecordDeleted
)
.subscribe();
return () => {
subscription.unsubscribe();
};
}, [ogDocuments, supabase]);
return (
<SidebarGroup className="group-data-[collapsible=icon]:hidden">
@ -92,19 +141,33 @@ export function NavDocuments({
session.data.session.refresh_token
);
toast.promise(
supabase.functions.invoke("process-document", {
body: data,
}),
{
loading: "Reprocessing document...",
success: "Document reprocessed successfully",
error: (err) => {
console.error("Error reprocessing document:", err);
return "Failed to reprocess document";
const eventSource = new SSE(`/api/process-document`, {
payload: data,
headers: {
apikey: process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!,
Authorization: `Bearer ${process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY}`,
},
method: "POST",
});
toast.loading("Reprocessing document...");
eventSource.onmessage = (event) => {
const message = JSON.parse(event.data);
if (message.status === "success") {
toast.success("Document reprocessed successfully");
eventSource.close();
} else if (message.status === "error") {
toast.error("Failed to reprocess document");
eventSource.close();
}
);
};
eventSource.onerror = (err) => {
console.error("SSE error:", err);
toast.error("An error occurred while reprocessing");
eventSource.close();
};
}}
>
<RefreshCw className="text-muted-foreground" />
@ -119,12 +182,6 @@ export function NavDocuments({
</DropdownMenu>
</SidebarMenuItem>
))}
<SidebarMenuItem>
<SidebarMenuButton className="text-sidebar-foreground/70">
<MoreHorizontal />
<span>More</span>
</SidebarMenuButton>
</SidebarMenuItem>
</SidebarMenu>
</SidebarGroup>
);

View File

@ -14,7 +14,7 @@ const client = new Mistral({
const PROCESSING_PROMPT = `
You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.**
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** If there is a title to the document, it should be the first heading.
Any images should be included.
Do not return the Markdown as a code block, only as a raw string, without any new lines.