From 7f9bdee7f441635edbc9c7f2fbbd495fc4789178 Mon Sep 17 00:00:00 2001 From: Jack Merrill Date: Wed, 7 May 2025 19:30:38 -0400 Subject: [PATCH] refactor: update imports and improve TTS functionality with pause/resume support --- app/actions.ts | 2 +- app/api/process-document/route.ts | 211 +++++++++++++----- components/KokoroReader.tsx | 12 +- components/MarkdownRenderer.tsx | 4 +- components/TTSProvider.tsx | 30 ++- components/app-sidebar.tsx | 2 +- .../{nav-favorites.tsx => nav-documents.tsx} | 95 ++++++-- supabase/functions/process-document/index.ts | 2 +- 8 files changed, 272 insertions(+), 86 deletions(-) rename components/{nav-favorites.tsx => nav-documents.tsx} (53%) diff --git a/app/actions.ts b/app/actions.ts index a2f3899..6bfc46b 100644 --- a/app/actions.ts +++ b/app/actions.ts @@ -115,7 +115,7 @@ export const synthesizeTTSAction = async (data: { }, previous_text: data.previous_text, next_text: data.next_text, - model_id: "eleven_multilingual_v2", + model_id: "eleven_flash_v2_5", // use eleven_multilingual_v2 if this doesnt sound good } ); const chunks: Buffer[] = []; diff --git a/app/api/process-document/route.ts b/app/api/process-document/route.ts index 7f271af..503011c 100644 --- a/app/api/process-document/route.ts +++ b/app/api/process-document/route.ts @@ -15,7 +15,7 @@ const client = new Mistral({ apiKey }); const PROCESSING_PROMPT = ` You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format. -The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** +The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** If there is a title to the document, it should be the first heading. Any images should be included. Do not return the Markdown as a code block, only as a raw string, without any new lines. @@ -49,18 +49,36 @@ Return the final result as a text object with the following structure (without c Do not return the text object as a code block, only as a raw string. `; -async function getCitations(citationsStr: string) { +function getCitations(citationsStr: string) { try { - const citations = JSON.parse(citationsStr).citations || {}; + console.log("Parsing citations string:", citationsStr); + const citationsData = JSON.parse(citationsStr); - return + console.log("Sanitizing citations..."); + const sanitizedCitations = citationsData.citations.map((citation: any) => { + const sanitizedText = citation.text.replace( + /(https?:\/\/[^\s]+)/g, + (url: string) => encodeURI(url) + ); + return { + ...citation, + text: sanitizedText, + }; + }); + + console.log("Sanitized citations:", sanitizedCitations); + return sanitizedCitations; + } catch (err) { + console.error("Error parsing or sanitizing citations:", err); + return []; } - - } export async function POST(req: NextRequest) { + console.log("Received POST request"); + if (req.method === "OPTIONS") { + console.log("Handling OPTIONS request"); return new NextResponse(null, { headers: { ...corsHeaders, @@ -69,27 +87,16 @@ export async function POST(req: NextRequest) { }); } - const formData = await req.formData(); - const accessToken = formData.get("access_token") as string; - const refreshToken = formData.get("refresh_token") as string; - - if (!formData.has("file") || !accessToken || !refreshToken) { - return NextResponse.json( - { - error: "Missing required fields: file, access_token, or refresh_token", - }, - { status: 400 } - ); - } - - const supabase = await createClient(); - - const file = formData.get("file") as File; - const fileName = file.name; - const uuid = crypto.randomUUID(); - try { - // Authenticate the user + console.log("Parsing form data..."); + const formData = await req.formData(); + const accessToken = formData.get("access_token") as string; + const refreshToken = formData.get("refresh_token") as string; + + console.log("Creating Supabase client..."); + const supabase = await createClient(); + + console.log("Authenticating user..."); const { data: { user }, error: sessionError, @@ -99,54 +106,152 @@ export async function POST(req: NextRequest) { }); if (sessionError) { + console.error("Failed to set session:", sessionError.message); throw new Error("Failed to set session: " + sessionError.message); } if (!user) { + console.error("User not authenticated"); throw new Error("User not authenticated"); } - // Upload the file to Supabase storage - const { data: storageData, error: storageError } = await supabase.storage - .from("documents") - .upload(`${user.id}/${uuid}.pdf`, file); + var reprocessing = false; + var uuid = crypto.randomUUID(); - if (storageError) { - throw new Error("Failed to upload file: " + storageError.message); + if (formData.has("id")) { + console.log("Reprocessing document..."); + reprocessing = true; + console.log("File ID found in form data."); + + const docId = formData.get("id"); + console.log("Document ID:", docId, formData); + const { data: documentData, error: documentError } = await supabase + .from("documents") + .select("*") + .eq("id", docId!.toString()) + .single(); + + if (documentError) { + console.error("Error fetching document record:", documentError); + + throw new Error("Document record fetch failed"); + } + + if (documentData) { + await supabase + .from("documents") + .update({ + is_processing: true, + }) + .eq("id", documentData.id); + uuid = documentData.id; + } else { + console.error("Document record not found."); + + throw new Error("Document record not found"); + } + + const { data: fileData, error: fileError } = await supabase.storage + .from("documents") + .download(`${user.id}/${uuid}.pdf`); + + if (fileError) { + console.error("Error downloading file from storage:", fileError); + + throw new Error("File download failed"); + } + + console.log("File downloaded from storage:", fileData); + + formData.set("file", fileData); } - // Insert document record - const { error: docError } = await supabase.from("documents").insert({ - id: uuid, - file_name: file.name, - owner: user.id, - raw_file: storageData.id, - is_processing: true, - }); - - if (docError) { - throw new Error("Failed to insert document record: " + docError.message); + if ( + !reprocessing && + (!formData.has("file") || !accessToken || !refreshToken) + ) { + console.error( + "Missing required fields: file, access_token, or refresh_token" + ); + return NextResponse.json( + { + error: + "Missing required fields: file, access_token, or refresh_token", + }, + { status: 400 } + ); } - // Upload file to Mistral + let file = formData.get("file") as File; + const fileName = file.name; + + if (!reprocessing) { + console.log("Generated UUID for file:", uuid); + + console.log("Uploading file to Supabase storage..."); + const { data: storageData, error: storageError } = await supabase.storage + .from("documents") + .upload(`${user.id}/${uuid}.pdf`, file); + + if (storageError) { + console.error("Failed to upload file:", storageError.message); + throw new Error("Failed to upload file: " + storageError.message); + } + + console.log("Inserting document record..."); + const { error: docError } = await supabase.from("documents").insert({ + id: uuid, + file_name: file.name, + owner: user.id, + raw_file: storageData.id, + is_processing: true, + }); + + if (docError) { + console.error("Failed to insert document record:", docError.message); + throw new Error( + "Failed to insert document record: " + docError.message + ); + } + } else { + console.log("Reprocessing document..."); + + const { error: docError } = await supabase + .from("documents") + .update({ + is_processing: true, + }) + .eq("id", uuid); + if (docError) { + console.error("Error updating document record:", docError); + throw new Error("Document record update failed"); + } + console.log("Document record updated successfully."); + } + + console.log("Uploading file to Mistral..."); const uploadedPdf = await client.files.upload({ - file: { fileName, content: file }, + file: { fileName: `${uuid}.pdf`, content: file }, purpose: "ocr", }); + console.log("Getting signed URL from Mistral..."); const signedUrl = await client.files.getSignedUrl({ fileId: uploadedPdf.id, }); - // Process OCR + console.log("Processing OCR..."); const ocrResponse = await client.ocr.process({ model: "mistral-ocr-latest", document: { type: "document_url", documentUrl: signedUrl.url }, + includeImageBase64: true, }); + console.log("Processing OCR pages..."); const limit = pLimit(2); const promises = ocrResponse.pages.map((page) => limit(async () => { + console.log("Processing page:", page); const response = await client.chat.complete({ model: "mistral-small-latest", messages: [ @@ -161,32 +266,34 @@ export async function POST(req: NextRequest) { const split = response.choices[0].message.content.split("---------"); const content = split[0].trim(); const citationsStr = split[1]?.trim() || "{}"; - console.log(citationsStr); + console.log("Citations string:", citationsStr); - const citations = await getCitations(citationsStr); + const citations = getCitations(citationsStr); return { - ...page, - markdown: content, - citations, + ...page, + markdown: content, + citations, }; }) ); const results = await Promise.all(promises); - // Update document record with OCR data + console.log("Updating document record with OCR data..."); const { error: updateError } = await supabase .from("documents") .update({ ocr_data: results, is_processing: false }) .eq("id", uuid); if (updateError) { + console.error("Failed to update document record:", updateError.message); throw new Error( "Failed to update document record: " + updateError.message ); } + console.log("Document processed successfully"); return NextResponse.json({ message: "Document processed successfully", results, diff --git a/components/KokoroReader.tsx b/components/KokoroReader.tsx index f80c9a4..d92e055 100644 --- a/components/KokoroReader.tsx +++ b/components/KokoroReader.tsx @@ -23,6 +23,7 @@ export default function KokoroReader({ pages }: { pages: any[] }) { playInOrder, status, pause, + resume, } = useTTS(); const [playing, setPlaying] = useState(false); @@ -32,18 +33,17 @@ export default function KokoroReader({ pages }: { pages: any[] }) { }, [status === "ready"]); const play = () => { - if (playing) { - setPlaying(false); - return; + if (!playing && status === "paused") { + resume(); + } else { + playInOrder(currentSentence || 0); } - setPlaying(true); - playInOrder(currentSentence || 0); }; const paused = () => { setPlaying(false); - pause(); + pause(); // Call the pause function from TTSProvider }; return ( diff --git a/components/MarkdownRenderer.tsx b/components/MarkdownRenderer.tsx index 07d4c4c..aabde57 100644 --- a/components/MarkdownRenderer.tsx +++ b/components/MarkdownRenderer.tsx @@ -90,7 +90,7 @@ export default function MarkdownRenderer({

), h4: ({ node, ...props }) => ( -

+

), p: ({ node, ...props }) => (

@@ -147,7 +147,7 @@ export default function MarkdownRenderer({ {...props} /> - +

{citation.text}

diff --git a/components/TTSProvider.tsx b/components/TTSProvider.tsx index 6c2338d..803a760 100644 --- a/components/TTSProvider.tsx +++ b/components/TTSProvider.tsx @@ -29,7 +29,7 @@ interface TTSContextType { currentSentence: number; voices: any[]; selectedSpeaker: string; - status: "ready" | "running" | null; + status: "ready" | "running" | "paused" | null; setSelectedSpeaker: (speaker: string) => void; setCurrentSentence: (index: number) => void; playSentence: (index: number) => void; @@ -60,11 +60,16 @@ export const TTSProvider = ({ const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart"); const [voices, setVoices] = useState([]); - const [status, setStatus] = useState<"ready" | "running" | null>("ready"); + const [status, setStatus] = useState<"ready" | "running" | "paused" | null>( + "ready" + ); // Cache for preloaded audio const audioCache = useRef>(new Map()); + // Currently processing TTS + const [processing, setProcessing] = useState([]); + // Preload audio for a range of sentences const preloadAudio = async (startIndex: number, range: number = 3) => { for ( @@ -72,9 +77,10 @@ export const TTSProvider = ({ i < Math.min(sentences.length, startIndex + range); i++ ) { - if (!audioCache.current.has(i)) { + if (!audioCache.current.has(i) && !processing.includes(i)) { console.log(`Preloading audio for sentence ${i}: ${sentences[i]}`); try { + setProcessing((prev) => [...prev, i]); // Add to processing const audioUrl = await generateTTS(sentences[i], i); audioCache.current.set(i, audioUrl); // Cache the audio URL } catch (error) { @@ -140,7 +146,15 @@ export const TTSProvider = ({ if (index < 0 || index >= sentences.length) return; setCurrentSentence(index); + // Introduce a flag to track whether playback should continue + let shouldContinue = true; + for (let i = index; i < sentences.length; i++) { + if (!shouldContinue) { + console.log("Playback stopped or paused."); + break; + } + console.log("Playing sentence:", i, sentences[i]); try { await playSentence(i); @@ -150,17 +164,22 @@ export const TTSProvider = ({ break; // Stop playback on error } } + + // Reset the playback state when done + setStatus("ready"); }; const pause = () => { if (audioRef.current) { audioRef.current.pause(); + setStatus("paused"); // Update the status to paused } }; const resume = () => { - if (audioRef.current) { + if (audioRef.current && status === "paused") { audioRef.current.play(); + setStatus("running"); // Update the status to running } }; @@ -168,7 +187,10 @@ export const TTSProvider = ({ if (audioRef.current) { audioRef.current.pause(); audioRef.current.currentTime = 0; + setStatus("ready"); // Update the status to ready } + // Reset the playback flag + shouldContinue = false; }; // Preload sentences when the current sentence changes diff --git a/components/app-sidebar.tsx b/components/app-sidebar.tsx index f4d344a..7c7f1be 100644 --- a/components/app-sidebar.tsx +++ b/components/app-sidebar.tsx @@ -17,7 +17,7 @@ import { Upload, } from "lucide-react"; -import { NavDocuments } from "@/components/nav-favorites"; +import { NavDocuments } from "@/components/nav-documents"; import { NavMain } from "@/components/nav-main"; import { NavSecondary } from "@/components/nav-secondary"; import { diff --git a/components/nav-favorites.tsx b/components/nav-documents.tsx similarity index 53% rename from components/nav-favorites.tsx rename to components/nav-documents.tsx index 85c2b3c..0d5ceb5 100644 --- a/components/nav-favorites.tsx +++ b/components/nav-documents.tsx @@ -29,9 +29,11 @@ import { } from "@/components/ui/sidebar"; import { createClient } from "@/utils/supabase/client"; import { toast } from "sonner"; +import { SSE } from "sse.js"; +import { useEffect, useState } from "react"; export function NavDocuments({ - documents, + documents: ogDocuments, }: { documents: { id: string; @@ -43,6 +45,53 @@ export function NavDocuments({ }) { const { isMobile } = useSidebar(); const supabase = createClient(); + const [documents, setDocuments] = useState(ogDocuments); + + useEffect(() => { + // watch for changes in the documents table, update the state when it changes + + const handleRecordInserted = (payload: any) => { + const newDocument = payload.new; + setDocuments((prev) => [...prev, newDocument]); + }; + const handleRecordUpdated = (payload: any) => { + const updatedDocument = payload.new; + setDocuments((prev) => + prev.map((doc) => + doc.id === updatedDocument.id ? updatedDocument : doc + ) + ); + }; + const handleRecordDeleted = (payload: any) => { + const deletedDocument = payload.old; + setDocuments((prev) => + prev.filter((doc) => doc.id !== deletedDocument.id) + ); + }; + + const subscription = supabase + .channel("documents") + .on( + "postgres_changes", + { event: "INSERT", schema: "public", table: "documents" }, + handleRecordInserted + ) + .on( + "postgres_changes", + { event: "UPDATE", schema: "public", table: "documents" }, + handleRecordUpdated + ) + .on( + "postgres_changes", + { event: "DELETE", schema: "public", table: "documents" }, + handleRecordDeleted + ) + .subscribe(); + + return () => { + subscription.unsubscribe(); + }; + }, [ogDocuments, supabase]); return ( @@ -92,19 +141,33 @@ export function NavDocuments({ session.data.session.refresh_token ); - toast.promise( - supabase.functions.invoke("process-document", { - body: data, - }), - { - loading: "Reprocessing document...", - success: "Document reprocessed successfully", - error: (err) => { - console.error("Error reprocessing document:", err); - return "Failed to reprocess document"; - }, + const eventSource = new SSE(`/api/process-document`, { + payload: data, + headers: { + apikey: process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!, + Authorization: `Bearer ${process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY}`, + }, + method: "POST", + }); + + toast.loading("Reprocessing document..."); + + eventSource.onmessage = (event) => { + const message = JSON.parse(event.data); + if (message.status === "success") { + toast.success("Document reprocessed successfully"); + eventSource.close(); + } else if (message.status === "error") { + toast.error("Failed to reprocess document"); + eventSource.close(); } - ); + }; + + eventSource.onerror = (err) => { + console.error("SSE error:", err); + toast.error("An error occurred while reprocessing"); + eventSource.close(); + }; }} > @@ -119,12 +182,6 @@ export function NavDocuments({ ))} - - - - More - - ); diff --git a/supabase/functions/process-document/index.ts b/supabase/functions/process-document/index.ts index d3d7a21..e43cdb9 100644 --- a/supabase/functions/process-document/index.ts +++ b/supabase/functions/process-document/index.ts @@ -14,7 +14,7 @@ const client = new Mistral({ const PROCESSING_PROMPT = ` You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format. -The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** +The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** If there is a title to the document, it should be the first heading. Any images should be included. Do not return the Markdown as a code block, only as a raw string, without any new lines.