diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 0000000..74baffc --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,3 @@ +{ + "recommendations": ["denoland.vscode-deno"] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..0dccffe --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,22 @@ +{ + "deno.enablePaths": ["supabase/functions"], + "deno.lint": true, + "deno.unstable": [ + "bare-node-builtins", + "byonm", + "sloppy-imports", + "unsafe-proto", + "webgpu", + "broadcast-channel", + "worker-options", + "cron", + "kv", + "ffi", + "fs", + "http", + "net" + ], + "[typescript]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" + } +} diff --git a/app/dashboard/documents/[id]/page.tsx b/app/dashboard/documents/[id]/page.tsx index 1884fd7..9e5b507 100644 --- a/app/dashboard/documents/[id]/page.tsx +++ b/app/dashboard/documents/[id]/page.tsx @@ -25,11 +25,7 @@ import { redirect } from "next/navigation"; import { remark } from "remark"; import remarkHtml from "remark-html"; -export default async function DocumentPage({ - params, -}: { - params: { id: string }; -}) { +export default async function DocumentPage(props: { params: { id: string } }) { const supabase = await createClient(); const { @@ -40,11 +36,13 @@ export default async function DocumentPage({ return redirect("/login"); } + const { id } = await props.params; + // Fetch the document details based on the ID from params const { data: document, error } = await supabase .from("documents") .select("*") - .eq("id", params.id) + .eq("id", id) .single(); if (error || !document) { diff --git a/app/dashboard/upload/process/route.ts b/app/dashboard/upload/process/route.ts index 76e1dd7..634cbfe 100644 --- a/app/dashboard/upload/process/route.ts +++ b/app/dashboard/upload/process/route.ts @@ -2,10 +2,22 @@ import { createClient } from "@/utils/supabase/server"; import { NextResponse } from "next/server"; import { Mistral } from "@mistralai/mistralai"; import { redirect } from "next/navigation"; +import { ChatCompletionChoice } from "@mistralai/mistralai/models/components"; +import pLimit from "p-limit"; const apiKey = process.env.MISTRAL_API_KEY; const client = new Mistral({ apiKey: apiKey }); +const PROCESSING_PROMPT = ` +You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format. + +The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. +Any images should be included. +Do not return the Markdown as a code block, only as a raw string, without any new lines. + +The Markdown should be human-readable and well-formatted. +`; + export async function POST(request: Request) { const supabase = await createClient(); const formData = await request.formData(); @@ -33,15 +45,126 @@ export async function POST(request: Request) { }, }); + const limit = pLimit(1); // Limit to 1 concurrent request (adjust as needed) + + const promises: Promise[] = []; + + for (const page of ocrResponse.pages) { + const pagePromise = limit(async () => { + const response = await client.chat.complete({ + model: "mistral-small-latest", + messages: [ + { + role: "user", + content: [ + { + type: "text", + text: PROCESSING_PROMPT, + }, + ], + }, + ], + }); + + if (!response.choices) { + console.error("No choices in response"); + return; + } + + const imageData: { [key: string]: string } = {}; + + if (page.images.length > 0) { + for (const img of page.images) { + imageData[img.id] = img.imageBase64!; + } + } + + if (response.choices[0].message.content) { + const markdown = replaceImagesInMarkdown( + response.choices[0].message.content.toString(), + imageData + ); + + return { + ...page, + markdown, + }; + } else { + console.error("Message content is undefined"); + } + }); + + promises.push(pagePromise); + } + + const results = await Promise.all(promises); + const sortedResults = results.sort((a, b) => a.index - b.index); + const { data, error } = await supabase .from("documents") .update({ - ocr_data: ocrResponse, + ocr_data: sortedResults, }) .eq("id", id); if (error) { console.error(error); return NextResponse.json({ error: error.message }, { status: 500 }); } - return redirect(`/dashboard/documents/${id}`); // Redirect to the document page after processing + return NextResponse.json({ + id, + }); +} + +interface OCRResponse { + pages: { + markdown: string; + images: { id: string; image_base64: string }[]; + }[]; +} + +function replaceImagesInMarkdown( + markdownStr: string, + imagesDict: { [key: string]: string } +): string { + /** + * Replace image placeholders in markdown with base64-encoded images. + * + * Args: + * markdownStr: Markdown text containing image placeholders + * imagesDict: Dictionary mapping image IDs to base64 strings + * + * Returns: + * Markdown text with images replaced by base64 data + */ + for (const [imgName, base64Str] of Object.entries(imagesDict)) { + markdownStr = markdownStr.replace( + new RegExp(`!\\[${imgName}\\]\\(${imgName}\\)`, "g"), + `![${imgName}](${base64Str})` + ); + } + return markdownStr; +} + +function getCombinedMarkdown(ocrResponse: OCRResponse): string { + /** + * Combine OCR text and images into a single markdown document. + * + * Args: + * ocrResponse: Response from OCR processing containing text and images + * + * Returns: + * Combined markdown string with embedded images + */ + const markdowns: string[] = []; + // Extract images from page + for (const page of ocrResponse.pages) { + const imageData: { [key: string]: string } = {}; + for (const img of page.images) { + imageData[img.id] = img.image_base64; + } + // Replace image placeholders with actual images + markdowns.push(replaceImagesInMarkdown(page.markdown, imageData)); + } + + return markdowns.join("\n\n"); } diff --git a/app/layout.tsx b/app/layout.tsx index c805869..430e63b 100644 --- a/app/layout.tsx +++ b/app/layout.tsx @@ -7,6 +7,7 @@ import { Geist } from "next/font/google"; import { ThemeProvider } from "next-themes"; import Link from "next/link"; import "./globals.css"; +import { Toaster } from "@/components/ui/sonner"; const defaultUrl = process.env.VERCEL_URL ? `https://${process.env.VERCEL_URL}` @@ -38,6 +39,7 @@ export default function RootLayout({ disableTransitionOnChange > {children} + diff --git a/bun.lockb b/bun.lockb index c9a33ab..da4b3e2 100755 Binary files a/bun.lockb and b/bun.lockb differ diff --git a/components/UploadZone.tsx b/components/UploadZone.tsx index 7897eef..2e3c8a2 100644 --- a/components/UploadZone.tsx +++ b/components/UploadZone.tsx @@ -1,49 +1,83 @@ "use client"; import { createClient } from "@/utils/supabase/client"; -import { CloudUpload } from "lucide-react"; +import { CloudUpload, LoaderCircle } from "lucide-react"; +import { useState } from "react"; +import { toast } from "sonner"; +import { SSE } from "sse.js"; export default function UploadZone({ user }: { user?: { id: string } }) { const supabase = createClient(); + const [uploading, setUploading] = useState(false); + const [status, setStatus] = useState(""); const onUpload = async (file: File) => { - const uuid = crypto.randomUUID(); + setUploading(true); + setStatus("Uploading..."); - const { data: fileData, error: fileError } = await supabase.storage - .from("documents") - .upload(`${user!.id}/${uuid}.pdf`, file); - - if (fileError) { - console.error(fileError); - return; - } - - console.log("File uploaded successfully:", fileData); - - const { data, error } = await supabase.from("documents").insert({ - id: uuid, - file_name: file.name, - owner: user!.id, - raw_file: fileData.id, - }); + const { data, error } = await supabase.auth.getSession(); if (error) { - console.error(error); + toast.error("Failed to get user session."); + setUploading(false); return; } - console.log("Document inserted successfully:", data); + const body = new FormData(); + body.append("file", file); + body.append("jwt", data.session?.access_token || ""); - // process file at /dashboard/upload/process - const formData = new FormData(); - formData.append("file", file); - formData.append("fileName", file.name); - formData.append("id", uuid); - const response = await fetch("/dashboard/upload/process", { - method: "POST", - body: formData, + const edgeFunctionUrl = `${process.env.NEXT_PUBLIC_SUPABASE_URL}/functions/v1/process-document`; + + // Start listening to the SSE stream + const eventSource = new SSE(edgeFunctionUrl, { + payload: body, + headers: { + apikey: process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!, + Authorization: `Bearer ${process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY}`, + "Content-Type": "application/json", + }, }); - const result = await response.json(); - console.log("File processed successfully:", result); + + eventSource.onmessage = (event) => { + const data = JSON.parse(event.data); + console.log("SSE Message:", data); + + if (data.message) { + setStatus(data.message); + } + }; + + eventSource.addEventListener("status", (event) => { + const data = JSON.parse(event.data); + console.log("Status Event:", data); + + setStatus(data.message); + }); + + eventSource.addEventListener("error", (event) => { + console.error("SSE Error:", event); + toast.error("An error occurred while processing the document."); + setUploading(false); + eventSource.close(); + }); + + eventSource.addEventListener("complete", (event) => { + const data = JSON.parse(event.data); + console.log("Processing Complete:", data); + toast.success("Document processing complete!"); + setUploading(false); + eventSource.close(); + }); + + // Invoke the serverless function + supabase.functions.invoke("process-document", { + body, + method: "POST", + }); + + toast.info( + "Document is being processed in the background. You will be notified when it's ready." + ); }; return ( @@ -53,13 +87,22 @@ export default function UploadZone({ user }: { user?: { id: string } }) { htmlFor="dropzone-file" className="flex flex-col items-center justify-center w-full h-64 border-2 border-muted border-dashed rounded-lg cursor-pointer bg-muted/50" > -
- -

- Click to upload or drag and - drop -

-
+ {uploading ? ( +
+ +

{status}

+
+ ) : ( + <> +
+ +

+ Click to upload or drag + and drop +

+
+ + )} Edit Oct 08 */} + + + + + +