From 7f9bdee7f441635edbc9c7f2fbbd495fc4789178 Mon Sep 17 00:00:00 2001
From: Jack Merrill <me@jackmerrill.com>
Date: Wed, 7 May 2025 19:30:38 -0400
Subject: [PATCH] refactor: update imports and improve TTS functionality with
 pause/resume support

---
 app/actions.ts                                |   2 +-
 app/api/process-document/route.ts             | 211 +++++++++++++-----
 components/KokoroReader.tsx                   |  12 +-
 components/MarkdownRenderer.tsx               |   4 +-
 components/TTSProvider.tsx                    |  30 ++-
 components/app-sidebar.tsx                    |   2 +-
 .../{nav-favorites.tsx => nav-documents.tsx}  |  95 ++++++--
 supabase/functions/process-document/index.ts  |   2 +-
 8 files changed, 272 insertions(+), 86 deletions(-)
 rename components/{nav-favorites.tsx => nav-documents.tsx} (53%)

diff --git a/app/actions.ts b/app/actions.ts
index a2f3899..6bfc46b 100644
--- a/app/actions.ts
+++ b/app/actions.ts
@@ -115,7 +115,7 @@ export const synthesizeTTSAction = async (data: {
         },
         previous_text: data.previous_text,
         next_text: data.next_text,
-        model_id: "eleven_multilingual_v2",
+        model_id: "eleven_flash_v2_5", // use eleven_multilingual_v2 if this doesnt sound good
       }
     );
     const chunks: Buffer[] = [];
diff --git a/app/api/process-document/route.ts b/app/api/process-document/route.ts
index 7f271af..503011c 100644
--- a/app/api/process-document/route.ts
+++ b/app/api/process-document/route.ts
@@ -15,7 +15,7 @@ const client = new Mistral({ apiKey });
 const PROCESSING_PROMPT = `
 You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
 
-The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.**
+The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** If there is a title to the document, it should be the first heading.
 Any images should be included.
 Do not return the Markdown as a code block, only as a raw string, without any new lines.
 
@@ -49,18 +49,36 @@ Return the final result as a text object with the following structure (without c
 Do not return the text object as a code block, only as a raw string.
 `;
 
-async function getCitations(citationsStr: string) {
+function getCitations(citationsStr: string) {
   try {
-    const citations = JSON.parse(citationsStr).citations || {};
+    console.log("Parsing citations string:", citationsStr);
+    const citationsData = JSON.parse(citationsStr);
 
-    return 
+    console.log("Sanitizing citations...");
+    const sanitizedCitations = citationsData.citations.map((citation: any) => {
+      const sanitizedText = citation.text.replace(
+        /(https?:\/\/[^\s]+)/g,
+        (url: string) => encodeURI(url)
+      );
+      return {
+        ...citation,
+        text: sanitizedText,
+      };
+    });
+
+    console.log("Sanitized citations:", sanitizedCitations);
+    return sanitizedCitations;
+  } catch (err) {
+    console.error("Error parsing or sanitizing citations:", err);
+    return [];
   }
-
-
 }
 
 export async function POST(req: NextRequest) {
+  console.log("Received POST request");
+
   if (req.method === "OPTIONS") {
+    console.log("Handling OPTIONS request");
     return new NextResponse(null, {
       headers: {
         ...corsHeaders,
@@ -69,27 +87,16 @@ export async function POST(req: NextRequest) {
     });
   }
 
-  const formData = await req.formData();
-  const accessToken = formData.get("access_token") as string;
-  const refreshToken = formData.get("refresh_token") as string;
-
-  if (!formData.has("file") || !accessToken || !refreshToken) {
-    return NextResponse.json(
-      {
-        error: "Missing required fields: file, access_token, or refresh_token",
-      },
-      { status: 400 }
-    );
-  }
-
-  const supabase = await createClient();
-
-  const file = formData.get("file") as File;
-  const fileName = file.name;
-  const uuid = crypto.randomUUID();
-
   try {
-    // Authenticate the user
+    console.log("Parsing form data...");
+    const formData = await req.formData();
+    const accessToken = formData.get("access_token") as string;
+    const refreshToken = formData.get("refresh_token") as string;
+
+    console.log("Creating Supabase client...");
+    const supabase = await createClient();
+
+    console.log("Authenticating user...");
     const {
       data: { user },
       error: sessionError,
@@ -99,54 +106,152 @@ export async function POST(req: NextRequest) {
     });
 
     if (sessionError) {
+      console.error("Failed to set session:", sessionError.message);
       throw new Error("Failed to set session: " + sessionError.message);
     }
 
     if (!user) {
+      console.error("User not authenticated");
       throw new Error("User not authenticated");
     }
 
-    // Upload the file to Supabase storage
-    const { data: storageData, error: storageError } = await supabase.storage
-      .from("documents")
-      .upload(`${user.id}/${uuid}.pdf`, file);
+    var reprocessing = false;
+    var uuid = crypto.randomUUID();
 
-    if (storageError) {
-      throw new Error("Failed to upload file: " + storageError.message);
+    if (formData.has("id")) {
+      console.log("Reprocessing document...");
+      reprocessing = true;
+      console.log("File ID found in form data.");
+
+      const docId = formData.get("id");
+      console.log("Document ID:", docId, formData);
+      const { data: documentData, error: documentError } = await supabase
+        .from("documents")
+        .select("*")
+        .eq("id", docId!.toString())
+        .single();
+
+      if (documentError) {
+        console.error("Error fetching document record:", documentError);
+
+        throw new Error("Document record fetch failed");
+      }
+
+      if (documentData) {
+        await supabase
+          .from("documents")
+          .update({
+            is_processing: true,
+          })
+          .eq("id", documentData.id);
+        uuid = documentData.id;
+      } else {
+        console.error("Document record not found.");
+
+        throw new Error("Document record not found");
+      }
+
+      const { data: fileData, error: fileError } = await supabase.storage
+        .from("documents")
+        .download(`${user.id}/${uuid}.pdf`);
+
+      if (fileError) {
+        console.error("Error downloading file from storage:", fileError);
+
+        throw new Error("File download failed");
+      }
+
+      console.log("File downloaded from storage:", fileData);
+
+      formData.set("file", fileData);
     }
 
-    // Insert document record
-    const { error: docError } = await supabase.from("documents").insert({
-      id: uuid,
-      file_name: file.name,
-      owner: user.id,
-      raw_file: storageData.id,
-      is_processing: true,
-    });
-
-    if (docError) {
-      throw new Error("Failed to insert document record: " + docError.message);
+    if (
+      !reprocessing &&
+      (!formData.has("file") || !accessToken || !refreshToken)
+    ) {
+      console.error(
+        "Missing required fields: file, access_token, or refresh_token"
+      );
+      return NextResponse.json(
+        {
+          error:
+            "Missing required fields: file, access_token, or refresh_token",
+        },
+        { status: 400 }
+      );
     }
 
-    // Upload file to Mistral
+    let file = formData.get("file") as File;
+    const fileName = file.name;
+
+    if (!reprocessing) {
+      console.log("Generated UUID for file:", uuid);
+
+      console.log("Uploading file to Supabase storage...");
+      const { data: storageData, error: storageError } = await supabase.storage
+        .from("documents")
+        .upload(`${user.id}/${uuid}.pdf`, file);
+
+      if (storageError) {
+        console.error("Failed to upload file:", storageError.message);
+        throw new Error("Failed to upload file: " + storageError.message);
+      }
+
+      console.log("Inserting document record...");
+      const { error: docError } = await supabase.from("documents").insert({
+        id: uuid,
+        file_name: file.name,
+        owner: user.id,
+        raw_file: storageData.id,
+        is_processing: true,
+      });
+
+      if (docError) {
+        console.error("Failed to insert document record:", docError.message);
+        throw new Error(
+          "Failed to insert document record: " + docError.message
+        );
+      }
+    } else {
+      console.log("Reprocessing document...");
+
+      const { error: docError } = await supabase
+        .from("documents")
+        .update({
+          is_processing: true,
+        })
+        .eq("id", uuid);
+      if (docError) {
+        console.error("Error updating document record:", docError);
+        throw new Error("Document record update failed");
+      }
+      console.log("Document record updated successfully.");
+    }
+
+    console.log("Uploading file to Mistral...");
     const uploadedPdf = await client.files.upload({
-      file: { fileName, content: file },
+      file: { fileName: `${uuid}.pdf`, content: file },
       purpose: "ocr",
     });
 
+    console.log("Getting signed URL from Mistral...");
     const signedUrl = await client.files.getSignedUrl({
       fileId: uploadedPdf.id,
     });
 
-    // Process OCR
+    console.log("Processing OCR...");
     const ocrResponse = await client.ocr.process({
       model: "mistral-ocr-latest",
       document: { type: "document_url", documentUrl: signedUrl.url },
+      includeImageBase64: true,
     });
 
+    console.log("Processing OCR pages...");
     const limit = pLimit(2);
     const promises = ocrResponse.pages.map((page) =>
       limit(async () => {
+        console.log("Processing page:", page);
         const response = await client.chat.complete({
           model: "mistral-small-latest",
           messages: [
@@ -161,32 +266,34 @@ export async function POST(req: NextRequest) {
         const split = response.choices[0].message.content.split("---------");
         const content = split[0].trim();
         const citationsStr = split[1]?.trim() || "{}";
-        console.log(citationsStr);
+        console.log("Citations string:", citationsStr);
 
-        const citations = await getCitations(citationsStr);
+        const citations = getCitations(citationsStr);
 
         return {
-            ...page,
-            markdown: content,
-            citations,
+          ...page,
+          markdown: content,
+          citations,
         };
       })
     );
 
     const results = await Promise.all(promises);
 
-    // Update document record with OCR data
+    console.log("Updating document record with OCR data...");
     const { error: updateError } = await supabase
       .from("documents")
       .update({ ocr_data: results, is_processing: false })
       .eq("id", uuid);
 
     if (updateError) {
+      console.error("Failed to update document record:", updateError.message);
       throw new Error(
         "Failed to update document record: " + updateError.message
       );
     }
 
+    console.log("Document processed successfully");
     return NextResponse.json({
       message: "Document processed successfully",
       results,
diff --git a/components/KokoroReader.tsx b/components/KokoroReader.tsx
index f80c9a4..d92e055 100644
--- a/components/KokoroReader.tsx
+++ b/components/KokoroReader.tsx
@@ -23,6 +23,7 @@ export default function KokoroReader({ pages }: { pages: any[] }) {
     playInOrder,
     status,
     pause,
+    resume,
   } = useTTS();
 
   const [playing, setPlaying] = useState(false);
@@ -32,18 +33,17 @@ export default function KokoroReader({ pages }: { pages: any[] }) {
   }, [status === "ready"]);
 
   const play = () => {
-    if (playing) {
-      setPlaying(false);
-      return;
+    if (!playing && status === "paused") {
+      resume();
+    } else {
+      playInOrder(currentSentence || 0);
     }
-
     setPlaying(true);
-    playInOrder(currentSentence || 0);
   };
 
   const paused = () => {
     setPlaying(false);
-    pause();
+    pause(); // Call the pause function from TTSProvider
   };
 
   return (
diff --git a/components/MarkdownRenderer.tsx b/components/MarkdownRenderer.tsx
index 07d4c4c..aabde57 100644
--- a/components/MarkdownRenderer.tsx
+++ b/components/MarkdownRenderer.tsx
@@ -90,7 +90,7 @@ export default function MarkdownRenderer({
       <h3 className="text-lg font-medium mb-2 text-gray-300" {...props} />
     ),
     h4: ({ node, ...props }) => (
-      <h4 className="text-lg font-medium mb-2 text-gray-300" {...props} />
+      <h4 className="text-lg font-bold mb-2 text-gray-300" {...props} />
     ),
     p: ({ node, ...props }) => (
       <p className="leading-7 text-gray-200" {...props} />
@@ -147,7 +147,7 @@ export default function MarkdownRenderer({
               {...props}
             />
           </PopoverTrigger>
-          <PopoverContent className="w-56 overflow-hidden rounded-lg p-0">
+          <PopoverContent className="w-auto max-w-3xl bg-gray-900 overflow-hidden rounded-lg p-0">
             <div className="p-4">
               <p>{citation.text}</p>
             </div>
diff --git a/components/TTSProvider.tsx b/components/TTSProvider.tsx
index 6c2338d..803a760 100644
--- a/components/TTSProvider.tsx
+++ b/components/TTSProvider.tsx
@@ -29,7 +29,7 @@ interface TTSContextType {
   currentSentence: number;
   voices: any[];
   selectedSpeaker: string;
-  status: "ready" | "running" | null;
+  status: "ready" | "running" | "paused" | null;
   setSelectedSpeaker: (speaker: string) => void;
   setCurrentSentence: (index: number) => void;
   playSentence: (index: number) => void;
@@ -60,11 +60,16 @@ export const TTSProvider = ({
 
   const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart");
   const [voices, setVoices] = useState<any[]>([]);
-  const [status, setStatus] = useState<"ready" | "running" | null>("ready");
+  const [status, setStatus] = useState<"ready" | "running" | "paused" | null>(
+    "ready"
+  );
 
   // Cache for preloaded audio
   const audioCache = useRef<Map<number, string>>(new Map());
 
+  // Currently processing TTS
+  const [processing, setProcessing] = useState<number[]>([]);
+
   // Preload audio for a range of sentences
   const preloadAudio = async (startIndex: number, range: number = 3) => {
     for (
@@ -72,9 +77,10 @@ export const TTSProvider = ({
       i < Math.min(sentences.length, startIndex + range);
       i++
     ) {
-      if (!audioCache.current.has(i)) {
+      if (!audioCache.current.has(i) && !processing.includes(i)) {
         console.log(`Preloading audio for sentence ${i}: ${sentences[i]}`);
         try {
+          setProcessing((prev) => [...prev, i]); // Add to processing
           const audioUrl = await generateTTS(sentences[i], i);
           audioCache.current.set(i, audioUrl); // Cache the audio URL
         } catch (error) {
@@ -140,7 +146,15 @@ export const TTSProvider = ({
     if (index < 0 || index >= sentences.length) return;
     setCurrentSentence(index);
 
+    // Introduce a flag to track whether playback should continue
+    let shouldContinue = true;
+
     for (let i = index; i < sentences.length; i++) {
+      if (!shouldContinue) {
+        console.log("Playback stopped or paused.");
+        break;
+      }
+
       console.log("Playing sentence:", i, sentences[i]);
       try {
         await playSentence(i);
@@ -150,17 +164,22 @@ export const TTSProvider = ({
         break; // Stop playback on error
       }
     }
+
+    // Reset the playback state when done
+    setStatus("ready");
   };
 
   const pause = () => {
     if (audioRef.current) {
       audioRef.current.pause();
+      setStatus("paused"); // Update the status to paused
     }
   };
 
   const resume = () => {
-    if (audioRef.current) {
+    if (audioRef.current && status === "paused") {
       audioRef.current.play();
+      setStatus("running"); // Update the status to running
     }
   };
 
@@ -168,7 +187,10 @@ export const TTSProvider = ({
     if (audioRef.current) {
       audioRef.current.pause();
       audioRef.current.currentTime = 0;
+      setStatus("ready"); // Update the status to ready
     }
+    // Reset the playback flag
+    shouldContinue = false;
   };
 
   // Preload sentences when the current sentence changes
diff --git a/components/app-sidebar.tsx b/components/app-sidebar.tsx
index f4d344a..7c7f1be 100644
--- a/components/app-sidebar.tsx
+++ b/components/app-sidebar.tsx
@@ -17,7 +17,7 @@ import {
   Upload,
 } from "lucide-react";
 
-import { NavDocuments } from "@/components/nav-favorites";
+import { NavDocuments } from "@/components/nav-documents";
 import { NavMain } from "@/components/nav-main";
 import { NavSecondary } from "@/components/nav-secondary";
 import {
diff --git a/components/nav-favorites.tsx b/components/nav-documents.tsx
similarity index 53%
rename from components/nav-favorites.tsx
rename to components/nav-documents.tsx
index 85c2b3c..0d5ceb5 100644
--- a/components/nav-favorites.tsx
+++ b/components/nav-documents.tsx
@@ -29,9 +29,11 @@ import {
 } from "@/components/ui/sidebar";
 import { createClient } from "@/utils/supabase/client";
 import { toast } from "sonner";
+import { SSE } from "sse.js";
+import { useEffect, useState } from "react";
 
 export function NavDocuments({
-  documents,
+  documents: ogDocuments,
 }: {
   documents: {
     id: string;
@@ -43,6 +45,53 @@ export function NavDocuments({
 }) {
   const { isMobile } = useSidebar();
   const supabase = createClient();
+  const [documents, setDocuments] = useState(ogDocuments);
+
+  useEffect(() => {
+    // watch for changes in the documents table, update the state when it changes
+
+    const handleRecordInserted = (payload: any) => {
+      const newDocument = payload.new;
+      setDocuments((prev) => [...prev, newDocument]);
+    };
+    const handleRecordUpdated = (payload: any) => {
+      const updatedDocument = payload.new;
+      setDocuments((prev) =>
+        prev.map((doc) =>
+          doc.id === updatedDocument.id ? updatedDocument : doc
+        )
+      );
+    };
+    const handleRecordDeleted = (payload: any) => {
+      const deletedDocument = payload.old;
+      setDocuments((prev) =>
+        prev.filter((doc) => doc.id !== deletedDocument.id)
+      );
+    };
+
+    const subscription = supabase
+      .channel("documents")
+      .on(
+        "postgres_changes",
+        { event: "INSERT", schema: "public", table: "documents" },
+        handleRecordInserted
+      )
+      .on(
+        "postgres_changes",
+        { event: "UPDATE", schema: "public", table: "documents" },
+        handleRecordUpdated
+      )
+      .on(
+        "postgres_changes",
+        { event: "DELETE", schema: "public", table: "documents" },
+        handleRecordDeleted
+      )
+      .subscribe();
+
+    return () => {
+      subscription.unsubscribe();
+    };
+  }, [ogDocuments, supabase]);
 
   return (
     <SidebarGroup className="group-data-[collapsible=icon]:hidden">
@@ -92,19 +141,33 @@ export function NavDocuments({
                       session.data.session.refresh_token
                     );
 
-                    toast.promise(
-                      supabase.functions.invoke("process-document", {
-                        body: data,
-                      }),
-                      {
-                        loading: "Reprocessing document...",
-                        success: "Document reprocessed successfully",
-                        error: (err) => {
-                          console.error("Error reprocessing document:", err);
-                          return "Failed to reprocess document";
-                        },
+                    const eventSource = new SSE(`/api/process-document`, {
+                      payload: data,
+                      headers: {
+                        apikey: process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!,
+                        Authorization: `Bearer ${process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY}`,
+                      },
+                      method: "POST",
+                    });
+
+                    toast.loading("Reprocessing document...");
+
+                    eventSource.onmessage = (event) => {
+                      const message = JSON.parse(event.data);
+                      if (message.status === "success") {
+                        toast.success("Document reprocessed successfully");
+                        eventSource.close();
+                      } else if (message.status === "error") {
+                        toast.error("Failed to reprocess document");
+                        eventSource.close();
                       }
-                    );
+                    };
+
+                    eventSource.onerror = (err) => {
+                      console.error("SSE error:", err);
+                      toast.error("An error occurred while reprocessing");
+                      eventSource.close();
+                    };
                   }}
                 >
                   <RefreshCw className="text-muted-foreground" />
@@ -119,12 +182,6 @@ export function NavDocuments({
             </DropdownMenu>
           </SidebarMenuItem>
         ))}
-        <SidebarMenuItem>
-          <SidebarMenuButton className="text-sidebar-foreground/70">
-            <MoreHorizontal />
-            <span>More</span>
-          </SidebarMenuButton>
-        </SidebarMenuItem>
       </SidebarMenu>
     </SidebarGroup>
   );
diff --git a/supabase/functions/process-document/index.ts b/supabase/functions/process-document/index.ts
index d3d7a21..e43cdb9 100644
--- a/supabase/functions/process-document/index.ts
+++ b/supabase/functions/process-document/index.ts
@@ -14,7 +14,7 @@ const client = new Mistral({
 const PROCESSING_PROMPT = `
 You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
 
-The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.**
+The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** If there is a title to the document, it should be the first heading.
 Any images should be included.
 Do not return the Markdown as a code block, only as a raw string, without any new lines.