Add VSCode settings, update dependencies, and enhance document processing with image handling

This commit is contained in:
Jack Merrill 2025-04-15 17:12:18 -04:00
parent 73a0fba45e
commit e84e5cc477
Signed by: jack
GPG Key ID: F6BFCA1B80EA6AF7
14 changed files with 1043 additions and 197 deletions

3
.vscode/extensions.json vendored Normal file
View File

@ -0,0 +1,3 @@
{
"recommendations": ["denoland.vscode-deno"]
}

22
.vscode/settings.json vendored Normal file
View File

@ -0,0 +1,22 @@
{
"deno.enablePaths": ["supabase/functions"],
"deno.lint": true,
"deno.unstable": [
"bare-node-builtins",
"byonm",
"sloppy-imports",
"unsafe-proto",
"webgpu",
"broadcast-channel",
"worker-options",
"cron",
"kv",
"ffi",
"fs",
"http",
"net"
],
"[typescript]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
}
}

View File

@ -25,11 +25,7 @@ import { redirect } from "next/navigation";
import { remark } from "remark"; import { remark } from "remark";
import remarkHtml from "remark-html"; import remarkHtml from "remark-html";
export default async function DocumentPage({ export default async function DocumentPage(props: { params: { id: string } }) {
params,
}: {
params: { id: string };
}) {
const supabase = await createClient(); const supabase = await createClient();
const { const {
@ -40,11 +36,13 @@ export default async function DocumentPage({
return redirect("/login"); return redirect("/login");
} }
const { id } = await props.params;
// Fetch the document details based on the ID from params // Fetch the document details based on the ID from params
const { data: document, error } = await supabase const { data: document, error } = await supabase
.from("documents") .from("documents")
.select("*") .select("*")
.eq("id", params.id) .eq("id", id)
.single(); .single();
if (error || !document) { if (error || !document) {

View File

@ -2,10 +2,22 @@ import { createClient } from "@/utils/supabase/server";
import { NextResponse } from "next/server"; import { NextResponse } from "next/server";
import { Mistral } from "@mistralai/mistralai"; import { Mistral } from "@mistralai/mistralai";
import { redirect } from "next/navigation"; import { redirect } from "next/navigation";
import { ChatCompletionChoice } from "@mistralai/mistralai/models/components";
import pLimit from "p-limit";
const apiKey = process.env.MISTRAL_API_KEY; const apiKey = process.env.MISTRAL_API_KEY;
const client = new Mistral({ apiKey: apiKey }); const client = new Mistral({ apiKey: apiKey });
const PROCESSING_PROMPT = `
You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content.
Any images should be included.
Do not return the Markdown as a code block, only as a raw string, without any new lines.
The Markdown should be human-readable and well-formatted.
`;
export async function POST(request: Request) { export async function POST(request: Request) {
const supabase = await createClient(); const supabase = await createClient();
const formData = await request.formData(); const formData = await request.formData();
@ -33,15 +45,126 @@ export async function POST(request: Request) {
}, },
}); });
const limit = pLimit(1); // Limit to 1 concurrent request (adjust as needed)
const promises: Promise<any>[] = [];
for (const page of ocrResponse.pages) {
const pagePromise = limit(async () => {
const response = await client.chat.complete({
model: "mistral-small-latest",
messages: [
{
role: "user",
content: [
{
type: "text",
text: PROCESSING_PROMPT,
},
],
},
],
});
if (!response.choices) {
console.error("No choices in response");
return;
}
const imageData: { [key: string]: string } = {};
if (page.images.length > 0) {
for (const img of page.images) {
imageData[img.id] = img.imageBase64!;
}
}
if (response.choices[0].message.content) {
const markdown = replaceImagesInMarkdown(
response.choices[0].message.content.toString(),
imageData
);
return {
...page,
markdown,
};
} else {
console.error("Message content is undefined");
}
});
promises.push(pagePromise);
}
const results = await Promise.all(promises);
const sortedResults = results.sort((a, b) => a.index - b.index);
const { data, error } = await supabase const { data, error } = await supabase
.from("documents") .from("documents")
.update({ .update({
ocr_data: ocrResponse, ocr_data: sortedResults,
}) })
.eq("id", id); .eq("id", id);
if (error) { if (error) {
console.error(error); console.error(error);
return NextResponse.json({ error: error.message }, { status: 500 }); return NextResponse.json({ error: error.message }, { status: 500 });
} }
return redirect(`/dashboard/documents/${id}`); // Redirect to the document page after processing return NextResponse.json({
id,
});
}
interface OCRResponse {
pages: {
markdown: string;
images: { id: string; image_base64: string }[];
}[];
}
function replaceImagesInMarkdown(
markdownStr: string,
imagesDict: { [key: string]: string }
): string {
/**
* Replace image placeholders in markdown with base64-encoded images.
*
* Args:
* markdownStr: Markdown text containing image placeholders
* imagesDict: Dictionary mapping image IDs to base64 strings
*
* Returns:
* Markdown text with images replaced by base64 data
*/
for (const [imgName, base64Str] of Object.entries(imagesDict)) {
markdownStr = markdownStr.replace(
new RegExp(`!\\[${imgName}\\]\\(${imgName}\\)`, "g"),
`![${imgName}](${base64Str})`
);
}
return markdownStr;
}
function getCombinedMarkdown(ocrResponse: OCRResponse): string {
/**
* Combine OCR text and images into a single markdown document.
*
* Args:
* ocrResponse: Response from OCR processing containing text and images
*
* Returns:
* Combined markdown string with embedded images
*/
const markdowns: string[] = [];
// Extract images from page
for (const page of ocrResponse.pages) {
const imageData: { [key: string]: string } = {};
for (const img of page.images) {
imageData[img.id] = img.image_base64;
}
// Replace image placeholders with actual images
markdowns.push(replaceImagesInMarkdown(page.markdown, imageData));
}
return markdowns.join("\n\n");
} }

View File

@ -7,6 +7,7 @@ import { Geist } from "next/font/google";
import { ThemeProvider } from "next-themes"; import { ThemeProvider } from "next-themes";
import Link from "next/link"; import Link from "next/link";
import "./globals.css"; import "./globals.css";
import { Toaster } from "@/components/ui/sonner";
const defaultUrl = process.env.VERCEL_URL const defaultUrl = process.env.VERCEL_URL
? `https://${process.env.VERCEL_URL}` ? `https://${process.env.VERCEL_URL}`
@ -38,6 +39,7 @@ export default function RootLayout({
disableTransitionOnChange disableTransitionOnChange
> >
{children} {children}
<Toaster />
<ThemeSwitcher /> <ThemeSwitcher />
</ThemeProvider> </ThemeProvider>
</body> </body>

BIN
bun.lockb

Binary file not shown.

View File

@ -1,49 +1,83 @@
"use client"; "use client";
import { createClient } from "@/utils/supabase/client"; import { createClient } from "@/utils/supabase/client";
import { CloudUpload } from "lucide-react"; import { CloudUpload, LoaderCircle } from "lucide-react";
import { useState } from "react";
import { toast } from "sonner";
import { SSE } from "sse.js";
export default function UploadZone({ user }: { user?: { id: string } }) { export default function UploadZone({ user }: { user?: { id: string } }) {
const supabase = createClient(); const supabase = createClient();
const [uploading, setUploading] = useState(false);
const [status, setStatus] = useState("");
const onUpload = async (file: File) => { const onUpload = async (file: File) => {
const uuid = crypto.randomUUID(); setUploading(true);
setStatus("Uploading...");
const { data: fileData, error: fileError } = await supabase.storage const { data, error } = await supabase.auth.getSession();
.from("documents")
.upload(`${user!.id}/${uuid}.pdf`, file);
if (fileError) {
console.error(fileError);
return;
}
console.log("File uploaded successfully:", fileData);
const { data, error } = await supabase.from("documents").insert({
id: uuid,
file_name: file.name,
owner: user!.id,
raw_file: fileData.id,
});
if (error) { if (error) {
console.error(error); toast.error("Failed to get user session.");
setUploading(false);
return; return;
} }
console.log("Document inserted successfully:", data); const body = new FormData();
body.append("file", file);
body.append("jwt", data.session?.access_token || "");
// process file at /dashboard/upload/process const edgeFunctionUrl = `${process.env.NEXT_PUBLIC_SUPABASE_URL}/functions/v1/process-document`;
const formData = new FormData();
formData.append("file", file); // Start listening to the SSE stream
formData.append("fileName", file.name); const eventSource = new SSE(edgeFunctionUrl, {
formData.append("id", uuid); payload: body,
const response = await fetch("/dashboard/upload/process", { headers: {
method: "POST", apikey: process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!,
body: formData, Authorization: `Bearer ${process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY}`,
"Content-Type": "application/json",
},
}); });
const result = await response.json();
console.log("File processed successfully:", result); eventSource.onmessage = (event) => {
const data = JSON.parse(event.data);
console.log("SSE Message:", data);
if (data.message) {
setStatus(data.message);
}
};
eventSource.addEventListener("status", (event) => {
const data = JSON.parse(event.data);
console.log("Status Event:", data);
setStatus(data.message);
});
eventSource.addEventListener("error", (event) => {
console.error("SSE Error:", event);
toast.error("An error occurred while processing the document.");
setUploading(false);
eventSource.close();
});
eventSource.addEventListener("complete", (event) => {
const data = JSON.parse(event.data);
console.log("Processing Complete:", data);
toast.success("Document processing complete!");
setUploading(false);
eventSource.close();
});
// Invoke the serverless function
supabase.functions.invoke("process-document", {
body,
method: "POST",
});
toast.info(
"Document is being processed in the background. You will be notified when it's ready."
);
}; };
return ( return (
@ -53,13 +87,22 @@ export default function UploadZone({ user }: { user?: { id: string } }) {
htmlFor="dropzone-file" htmlFor="dropzone-file"
className="flex flex-col items-center justify-center w-full h-64 border-2 border-muted border-dashed rounded-lg cursor-pointer bg-muted/50" className="flex flex-col items-center justify-center w-full h-64 border-2 border-muted border-dashed rounded-lg cursor-pointer bg-muted/50"
> >
<div className="flex flex-col items-center justify-center pt-5 pb-5"> {uploading ? (
<CloudUpload className="w-10 h-10 mb-4 text-slate-400" /> <div className="flex flex-col items-center justify-center pt-5 pb-5">
<p className="mb-2 text-sm text-slate-400"> <LoaderCircle className="w-10 h-10 mb-4 text-slate-400 animate-spin" />
<span className="font-semibold">Click to upload</span> or drag and <p className="mb-2 text-sm text-slate-400">{status}</p>
drop </div>
</p> ) : (
</div> <>
<div className="flex flex-col items-center justify-center pt-5 pb-5">
<CloudUpload className="w-10 h-10 mb-4 text-slate-400" />
<p className="mb-2 text-sm text-slate-400">
<span className="font-semibold">Click to upload</span> or drag
and drop
</p>
</div>
</>
)}
<input <input
id="dropzone-file" id="dropzone-file"
type="file" type="file"

View File

@ -2,6 +2,7 @@
import * as React from "react"; import * as React from "react";
import { import {
ALargeSmall,
ArrowDown, ArrowDown,
ArrowUp, ArrowUp,
Bell, Bell,
@ -112,6 +113,14 @@ export function NavActions({ pages }: { pages: any[] }) {
{/* <div className="text-muted-foreground hidden font-medium md:inline-block"> {/* <div className="text-muted-foreground hidden font-medium md:inline-block">
Edit Oct 08 Edit Oct 08
</div> */} </div> */}
<Popover>
<PopoverTrigger asChild>
<Button variant="ghost" size="icon" className="h-7 w-7">
<ALargeSmall />
</Button>
</PopoverTrigger>
<PopoverContent></PopoverContent>
</Popover>
<Popover> <Popover>
<PopoverTrigger asChild> <PopoverTrigger asChild>
<Button variant="ghost" size="icon" className="h-7 w-7"> <Button variant="ghost" size="icon" className="h-7 w-7">

25
components/ui/sonner.tsx Normal file
View File

@ -0,0 +1,25 @@
"use client"
import { useTheme } from "next-themes"
import { Toaster as Sonner, ToasterProps } from "sonner"
const Toaster = ({ ...props }: ToasterProps) => {
const { theme = "system" } = useTheme()
return (
<Sonner
theme={theme as ToasterProps["theme"]}
className="toaster group"
style={
{
"--normal-bg": "var(--popover)",
"--normal-text": "var(--popover-foreground)",
"--normal-border": "var(--border)",
} as React.CSSProperties
}
{...props}
/>
)
}
export { Toaster }

View File

@ -30,12 +30,15 @@
"kokoro-js": "^1.2.0", "kokoro-js": "^1.2.0",
"lucide-react": "^0.486.0", "lucide-react": "^0.486.0",
"next": "latest", "next": "latest",
"next-themes": "^0.4.3", "next-themes": "^0.4.6",
"p-limit": "^6.2.0",
"prettier": "^3.3.3", "prettier": "^3.3.3",
"react": "19.0.0", "react": "19.0.0",
"react-dom": "19.0.0", "react-dom": "19.0.0",
"remark": "^15.0.1", "remark": "^15.0.1",
"remark-html": "^16.0.1", "remark-html": "^16.0.1",
"sonner": "^2.0.3",
"sse.js": "^2.6.0",
"tw-animate-css": "^1.2.5", "tw-animate-css": "^1.2.5",
"zod": "^3.24.2" "zod": "^3.24.2"
}, },

8
supabase/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# Supabase
.branches
.temp
# dotenvx
.env.keys
.env.local
.env.*.local

308
supabase/config.toml Normal file
View File

@ -0,0 +1,308 @@
# For detailed configuration reference documentation, visit:
# https://supabase.com/docs/guides/local-development/cli/config
# A string used to distinguish different Supabase projects on the same host. Defaults to the
# working directory name when running `supabase init`.
project_id = "neuro-read"
[api]
enabled = true
# Port to use for the API URL.
port = 54321
# Schemas to expose in your API. Tables, views and stored procedures in this schema will get API
# endpoints. `public` and `graphql_public` schemas are included by default.
schemas = ["public", "graphql_public"]
# Extra schemas to add to the search_path of every request.
extra_search_path = ["public", "extensions"]
# The maximum number of rows returns from a view, table, or stored procedure. Limits payload size
# for accidental or malicious requests.
max_rows = 1000
[api.tls]
# Enable HTTPS endpoints locally using a self-signed certificate.
enabled = false
[db]
# Port to use for the local database URL.
port = 54322
# Port used by db diff command to initialize the shadow database.
shadow_port = 54320
# The database major version to use. This has to be the same as your remote database's. Run `SHOW
# server_version;` on the remote database to check.
major_version = 15
[db.pooler]
enabled = false
# Port to use for the local connection pooler.
port = 54329
# Specifies when a server connection can be reused by other clients.
# Configure one of the supported pooler modes: `transaction`, `session`.
pool_mode = "transaction"
# How many server connections to allow per user/database pair.
default_pool_size = 20
# Maximum number of client connections allowed.
max_client_conn = 100
# [db.vault]
# secret_key = "env(SECRET_VALUE)"
[db.migrations]
# Specifies an ordered list of schema files that describe your database.
# Supports glob patterns relative to supabase directory: "./schemas/*.sql"
schema_paths = []
[db.seed]
# If enabled, seeds the database after migrations during a db reset.
enabled = true
# Specifies an ordered list of seed files to load during db reset.
# Supports glob patterns relative to supabase directory: "./seeds/*.sql"
sql_paths = ["./seed.sql"]
[realtime]
enabled = true
# Bind realtime via either IPv4 or IPv6. (default: IPv4)
# ip_version = "IPv6"
# The maximum length in bytes of HTTP request headers. (default: 4096)
# max_header_length = 4096
[studio]
enabled = true
# Port to use for Supabase Studio.
port = 54323
# External URL of the API server that frontend connects to.
api_url = "http://127.0.0.1"
# OpenAI API Key to use for Supabase AI in the Supabase Studio.
openai_api_key = "env(OPENAI_API_KEY)"
# Email testing server. Emails sent with the local dev setup are not actually sent - rather, they
# are monitored, and you can view the emails that would have been sent from the web interface.
[inbucket]
enabled = true
# Port to use for the email testing server web interface.
port = 54324
# Uncomment to expose additional ports for testing user applications that send emails.
# smtp_port = 54325
# pop3_port = 54326
# admin_email = "admin@email.com"
# sender_name = "Admin"
[storage]
enabled = true
# The maximum file size allowed (e.g. "5MB", "500KB").
file_size_limit = "50MiB"
# Image transformation API is available to Supabase Pro plan.
# [storage.image_transformation]
# enabled = true
# Uncomment to configure local storage buckets
# [storage.buckets.images]
# public = false
# file_size_limit = "50MiB"
# allowed_mime_types = ["image/png", "image/jpeg"]
# objects_path = "./images"
[auth]
enabled = true
# The base URL of your website. Used as an allow-list for redirects and for constructing URLs used
# in emails.
site_url = "http://127.0.0.1:3000"
# A list of *exact* URLs that auth providers are permitted to redirect to post authentication.
additional_redirect_urls = ["https://127.0.0.1:3000"]
# How long tokens are valid for, in seconds. Defaults to 3600 (1 hour), maximum 604,800 (1 week).
jwt_expiry = 3600
# If disabled, the refresh token will never expire.
enable_refresh_token_rotation = true
# Allows refresh tokens to be reused after expiry, up to the specified interval in seconds.
# Requires enable_refresh_token_rotation = true.
refresh_token_reuse_interval = 10
# Allow/disallow new user signups to your project.
enable_signup = true
# Allow/disallow anonymous sign-ins to your project.
enable_anonymous_sign_ins = false
# Allow/disallow testing manual linking of accounts
enable_manual_linking = false
# Passwords shorter than this value will be rejected as weak. Minimum 6, recommended 8 or more.
minimum_password_length = 6
# Passwords that do not meet the following requirements will be rejected as weak. Supported values
# are: `letters_digits`, `lower_upper_letters_digits`, `lower_upper_letters_digits_symbols`
password_requirements = ""
[auth.rate_limit]
# Number of emails that can be sent per hour. Requires auth.email.smtp to be enabled.
email_sent = 2
# Number of SMS messages that can be sent per hour. Requires auth.sms to be enabled.
sms_sent = 30
# Number of anonymous sign-ins that can be made per hour per IP address. Requires enable_anonymous_sign_ins = true.
anonymous_users = 30
# Number of sessions that can be refreshed in a 5 minute interval per IP address.
token_refresh = 150
# Number of sign up and sign-in requests that can be made in a 5 minute interval per IP address (excludes anonymous users).
sign_in_sign_ups = 30
# Number of OTP / Magic link verifications that can be made in a 5 minute interval per IP address.
token_verifications = 30
# Configure one of the supported captcha providers: `hcaptcha`, `turnstile`.
# [auth.captcha]
# enabled = true
# provider = "hcaptcha"
# secret = ""
[auth.email]
# Allow/disallow new user signups via email to your project.
enable_signup = true
# If enabled, a user will be required to confirm any email change on both the old, and new email
# addresses. If disabled, only the new email is required to confirm.
double_confirm_changes = true
# If enabled, users need to confirm their email address before signing in.
enable_confirmations = false
# If enabled, users will need to reauthenticate or have logged in recently to change their password.
secure_password_change = false
# Controls the minimum amount of time that must pass before sending another signup confirmation or password reset email.
max_frequency = "1s"
# Number of characters used in the email OTP.
otp_length = 6
# Number of seconds before the email OTP expires (defaults to 1 hour).
otp_expiry = 3600
# Use a production-ready SMTP server
# [auth.email.smtp]
# enabled = true
# host = "smtp.sendgrid.net"
# port = 587
# user = "apikey"
# pass = "env(SENDGRID_API_KEY)"
# admin_email = "admin@email.com"
# sender_name = "Admin"
# Uncomment to customize email template
# [auth.email.template.invite]
# subject = "You have been invited"
# content_path = "./supabase/templates/invite.html"
[auth.sms]
# Allow/disallow new user signups via SMS to your project.
enable_signup = false
# If enabled, users need to confirm their phone number before signing in.
enable_confirmations = false
# Template for sending OTP to users
template = "Your code is {{ .Code }}"
# Controls the minimum amount of time that must pass before sending another sms otp.
max_frequency = "5s"
# Use pre-defined map of phone number to OTP for testing.
# [auth.sms.test_otp]
# 4152127777 = "123456"
# Configure logged in session timeouts.
# [auth.sessions]
# Force log out after the specified duration.
# timebox = "24h"
# Force log out if the user has been inactive longer than the specified duration.
# inactivity_timeout = "8h"
# This hook runs before a token is issued and allows you to add additional claims based on the authentication method used.
# [auth.hook.custom_access_token]
# enabled = true
# uri = "pg-functions://<database>/<schema>/<hook_name>"
# Configure one of the supported SMS providers: `twilio`, `twilio_verify`, `messagebird`, `textlocal`, `vonage`.
[auth.sms.twilio]
enabled = false
account_sid = ""
message_service_sid = ""
# DO NOT commit your Twilio auth token to git. Use environment variable substitution instead:
auth_token = "env(SUPABASE_AUTH_SMS_TWILIO_AUTH_TOKEN)"
# Multi-factor-authentication is available to Supabase Pro plan.
[auth.mfa]
# Control how many MFA factors can be enrolled at once per user.
max_enrolled_factors = 10
# Control MFA via App Authenticator (TOTP)
[auth.mfa.totp]
enroll_enabled = false
verify_enabled = false
# Configure MFA via Phone Messaging
[auth.mfa.phone]
enroll_enabled = false
verify_enabled = false
otp_length = 6
template = "Your code is {{ .Code }}"
max_frequency = "5s"
# Configure MFA via WebAuthn
# [auth.mfa.web_authn]
# enroll_enabled = true
# verify_enabled = true
# Use an external OAuth provider. The full list of providers are: `apple`, `azure`, `bitbucket`,
# `discord`, `facebook`, `github`, `gitlab`, `google`, `keycloak`, `linkedin_oidc`, `notion`, `twitch`,
# `twitter`, `slack`, `spotify`, `workos`, `zoom`.
[auth.external.apple]
enabled = false
client_id = ""
# DO NOT commit your OAuth provider secret to git. Use environment variable substitution instead:
secret = "env(SUPABASE_AUTH_EXTERNAL_APPLE_SECRET)"
# Overrides the default auth redirectUrl.
redirect_uri = ""
# Overrides the default auth provider URL. Used to support self-hosted gitlab, single-tenant Azure,
# or any other third-party OIDC providers.
url = ""
# If enabled, the nonce check will be skipped. Required for local sign in with Google auth.
skip_nonce_check = false
# Use Firebase Auth as a third-party provider alongside Supabase Auth.
[auth.third_party.firebase]
enabled = false
# project_id = "my-firebase-project"
# Use Auth0 as a third-party provider alongside Supabase Auth.
[auth.third_party.auth0]
enabled = false
# tenant = "my-auth0-tenant"
# tenant_region = "us"
# Use AWS Cognito (Amplify) as a third-party provider alongside Supabase Auth.
[auth.third_party.aws_cognito]
enabled = false
# user_pool_id = "my-user-pool-id"
# user_pool_region = "us-east-1"
# Use Clerk as a third-party provider alongside Supabase Auth.
[auth.third_party.clerk]
enabled = false
# Obtain from https://clerk.com/setup/supabase
# domain = "example.clerk.accounts.dev"
[edge_runtime]
enabled = true
# Configure one of the supported request policies: `oneshot`, `per_worker`.
# Use `oneshot` for hot reload, or `per_worker` for load testing.
policy = "oneshot"
# Port to attach the Chrome inspector for debugging edge functions.
inspector_port = 8083
# The Deno major version to use.
deno_version = 1
# [edge_runtime.secrets]
# secret_key = "env(SECRET_VALUE)"
[analytics]
enabled = true
port = 54327
# Configure one of the supported backends: `postgres`, `bigquery`.
backend = "postgres"
# Experimental features may be deprecated any time
[experimental]
# Configures Postgres storage engine to use OrioleDB (S3)
orioledb_version = ""
# Configures S3 bucket URL, eg. <bucket_name>.s3-<region>.amazonaws.com
s3_host = "env(S3_HOST)"
# Configures S3 bucket region, eg. us-east-1
s3_region = "env(S3_REGION)"
# Configures AWS_ACCESS_KEY_ID for S3 bucket
s3_access_key = "env(S3_ACCESS_KEY)"
# Configures AWS_SECRET_ACCESS_KEY for S3 bucket
s3_secret_key = "env(S3_SECRET_KEY)"

View File

@ -0,0 +1,285 @@
import "jsr:@supabase/functions-js/edge-runtime.d.ts";
import { createClient } from "jsr:@supabase/supabase-js@2";
import { Mistral } from "npm:@mistralai/mistralai";
import pLimit from "npm:p-limit";
export const corsHeaders = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Headers":
"authorization, x-client-info, apikey, content-type",
};
const apiKey = Deno.env.get("MISTRAL_API_KEY");
const client = new Mistral({
apiKey: apiKey,
});
const PROCESSING_PROMPT = `
You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content.
Any images should be included.
Do not return the Markdown as a code block, only as a raw string, without any new lines.
The Markdown should be human-readable and well-formatted.
`;
Deno.serve(async (req) => {
console.log("Request received:", req.method);
if (req.method === "OPTIONS") {
return new Response("ok", { headers: corsHeaders });
}
if (req.method === "POST") {
console.log("Processing POST request...");
const { body, writable } = new TransformStream();
const writer = writable.getWriter();
// Set up the SSE response
const headers = new Headers({
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
...corsHeaders,
});
const sendEvent = async (event: string, data: any) => {
const message = `event: ${event}\ndata: ${JSON.stringify(data)}\n\n`;
console.log("Sending event:", message);
await writer.write(new TextEncoder().encode(message));
};
// Start streaming updates
sendEvent("status", { message: "Initializing..." });
try {
const supabase = createClient(
Deno.env.get("SUPABASE_URL"),
Deno.env.get("SUPABASE_ANON_KEY")
);
const formData = await req.formData();
const file = formData.get("file");
const jwt = formData.get("jwt");
const fileName = file.name;
const uuid = crypto.randomUUID();
console.log("Generated UUID:", uuid);
sendEvent("status", { message: "Generated UUID", uuid });
const user = await supabase.auth.getUser(jwt);
console.log("Authenticated user:", user);
sendEvent("status", { message: "Authenticated user", user });
const { data: storageData, error: storageError } = await supabase.storage
.from("documents")
.upload(`${user!.id}/${uuid}.pdf`, file);
if (storageError) {
console.error("Error uploading file to storage:", storageError);
sendEvent("error", {
message: "Error uploading file to storage",
error: storageError,
});
throw new Error("File upload failed");
} else {
console.log("File uploaded to storage:", storageData);
sendEvent("status", {
message: "File uploaded to storage",
storageData,
});
}
const { error: docError } = await supabase.from("documents").insert({
id: uuid,
file_name: file.name,
owner: user!.id,
raw_file: storageData.id,
is_processing: true,
});
if (docError) {
console.error("Error inserting document record:", docError);
sendEvent("error", {
message: "Error inserting document record",
error: docError,
});
throw new Error("Document record insertion failed");
} else {
console.log("Document record inserted successfully.");
sendEvent("status", {
message: "Document record inserted successfully",
});
}
console.log("Uploading file to Mistral...");
sendEvent("status", { message: "Uploading file to Mistral..." });
const uploaded_pdf = await client.files.upload({
file: {
fileName,
content: file,
},
purpose: "ocr",
});
console.log("File uploaded to Mistral:", uploaded_pdf);
sendEvent("status", {
message: "File uploaded to Mistral",
uploaded_pdf,
});
const signedUrl = await client.files.getSignedUrl({
fileId: uploaded_pdf.id,
});
console.log("Generated signed URL:", signedUrl);
sendEvent("status", { message: "Generated signed URL", signedUrl });
console.log("Processing OCR...");
sendEvent("status", { message: "Processing OCR..." });
const ocrResponse = await client.ocr.process({
model: "mistral-ocr-latest",
document: {
type: "document_url",
documentUrl: signedUrl.url,
},
});
console.log("OCR response received:", ocrResponse);
sendEvent("status", { message: "OCR response received", ocrResponse });
const limit = pLimit(1);
const promises = [];
for (const page of ocrResponse.pages) {
console.log("Processing page:", page.index);
sendEvent("status", { message: `Processing page ${page.index}` });
const pagePromise = limit(async () => {
console.log(`Processing page ${page.index} with Mistral...`);
const response = await client.chat.complete({
model: "mistral-small-latest",
messages: [
{
role: "user",
content: [
{
type: "text",
text: PROCESSING_PROMPT,
},
],
},
],
});
if (!response.choices) {
console.error("No choices in response for page:", page.index);
sendEvent("error", {
message: `No choices in response for page ${page.index}`,
});
return;
}
console.log("Response received for page:", page.index);
sendEvent("status", {
message: `Response received for page ${page.index}`,
});
const imageData = {};
if (page.images.length > 0) {
console.log(
`Processing ${page.images.length} images for page ${page.index}...`
);
sendEvent("status", {
message: `Processing images for page ${page.index}`,
});
for (const img of page.images) {
imageData[img.id] = img.imageBase64;
}
}
if (response.choices[0].message.content) {
console.log("Generating Markdown for page:", page.index);
sendEvent("status", {
message: `Generating Markdown for page ${page.index}`,
});
const markdown = replaceImagesInMarkdown(
response.choices[0].message.content.toString(),
imageData
);
return {
...page,
markdown,
};
} else {
console.error("Message content is undefined for page:", page.index);
sendEvent("error", {
message: `Message content is undefined for page ${page.index}`,
});
}
});
promises.push(pagePromise);
}
console.log("Waiting for all pages to be processed...");
sendEvent("status", {
message: "Waiting for all pages to be processed...",
});
const results = await Promise.all(promises);
console.log("All pages processed. Results:", results);
sendEvent("status", { message: "All pages processed", results });
const sortedResults = results.sort((a, b) => a.index - b.index);
console.log("Sorted results:", sortedResults);
sendEvent("status", { message: "Sorted results", sortedResults });
const { data, error } = await supabase
.from("documents")
.update({
ocr_data: sortedResults,
is_processing: false,
})
.eq("id", uuid);
if (error) {
console.error("Error updating document record:", error);
sendEvent("error", {
message: "Error updating document record",
error,
});
throw new Error("Document record update failed");
}
console.log("Document record updated successfully.");
sendEvent("status", { message: "Document record updated successfully" });
sendEvent("status", { completed: true, uuid });
} catch (error) {
console.error("Error during processing:", error);
sendEvent("error", { message: "Error during processing", error });
} finally {
console.log("Closing SSE stream...");
await writer.close();
}
return new Response(body, { headers });
}
console.error("Method not allowed:", req.method);
return new Response("Method not allowed", {
status: 405,
});
});
function replaceImagesInMarkdown(markdownStr, imagesDict) {
console.log("Replacing images in Markdown...");
for (const [imgName, base64Str] of Object.entries(imagesDict)) {
markdownStr = markdownStr.replace(
new RegExp(`!\\[${imgName}\\]\\(${imgName}\\)`, "g"),
`![${imgName}](${base64Str})`
);
}
console.log("Image replacement complete.");
return markdownStr;
}

View File

@ -1,150 +1,167 @@
export type Json = export type Json =
| string | string
| number | number
| boolean | boolean
| null | null
| { [key: string]: Json | undefined } | { [key: string]: Json | undefined }
| Json[] | Json[]
export type Database = { export type Database = {
public: { public: {
Tables: { Tables: {
documents: { documents: {
Row: { Row: {
created_at: string created_at: string
file_name: string file_name: string
id: string id: string
ocr_data: Json | null is_processing: boolean
owner: string ocr_data: Json | null
raw_file: string owner: string
} raw_file: string
Insert: { }
created_at?: string Insert: {
file_name: string created_at?: string
id?: string file_name: string
ocr_data?: Json | null id?: string
owner: string is_processing?: boolean
raw_file: string ocr_data?: Json | null
} owner: string
Update: { raw_file: string
created_at?: string }
file_name?: string Update: {
id?: string created_at?: string
ocr_data?: Json | null file_name?: string
owner?: string id?: string
raw_file?: string is_processing?: boolean
} ocr_data?: Json | null
Relationships: [] owner?: string
} raw_file?: string
} }
Views: { Relationships: []
[_ in never]: never }
} }
Functions: { Views: {
[_ in never]: never [_ in never]: never
} }
Enums: { Functions: {
[_ in never]: never [_ in never]: never
} }
CompositeTypes: { Enums: {
[_ in never]: never [_ in never]: never
} }
} CompositeTypes: {
} [_ in never]: never
}
type PublicSchema = Database[Extract<keyof Database, "public">] }
}
export type Tables<
PublicTableNameOrOptions extends type DefaultSchema = Database[Extract<keyof Database, "public">]
| keyof (PublicSchema["Tables"] & PublicSchema["Views"])
| { schema: keyof Database }, export type Tables<
TableName extends PublicTableNameOrOptions extends { schema: keyof Database } DefaultSchemaTableNameOrOptions extends
? keyof (Database[PublicTableNameOrOptions["schema"]]["Tables"] & | keyof (DefaultSchema["Tables"] & DefaultSchema["Views"])
Database[PublicTableNameOrOptions["schema"]]["Views"]) | { schema: keyof Database },
: never = never, TableName extends DefaultSchemaTableNameOrOptions extends {
> = PublicTableNameOrOptions extends { schema: keyof Database } schema: keyof Database
? (Database[PublicTableNameOrOptions["schema"]]["Tables"] & }
Database[PublicTableNameOrOptions["schema"]]["Views"])[TableName] extends { ? keyof (Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] &
Row: infer R Database[DefaultSchemaTableNameOrOptions["schema"]]["Views"])
} : never = never,
? R > = DefaultSchemaTableNameOrOptions extends { schema: keyof Database }
: never ? (Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"] &
: PublicTableNameOrOptions extends keyof (PublicSchema["Tables"] & Database[DefaultSchemaTableNameOrOptions["schema"]]["Views"])[TableName] extends {
PublicSchema["Views"]) Row: infer R
? (PublicSchema["Tables"] & }
PublicSchema["Views"])[PublicTableNameOrOptions] extends { ? R
Row: infer R : never
} : DefaultSchemaTableNameOrOptions extends keyof (DefaultSchema["Tables"] &
? R DefaultSchema["Views"])
: never ? (DefaultSchema["Tables"] &
: never DefaultSchema["Views"])[DefaultSchemaTableNameOrOptions] extends {
Row: infer R
export type TablesInsert< }
PublicTableNameOrOptions extends ? R
| keyof PublicSchema["Tables"] : never
| { schema: keyof Database }, : never
TableName extends PublicTableNameOrOptions extends { schema: keyof Database }
? keyof Database[PublicTableNameOrOptions["schema"]]["Tables"] export type TablesInsert<
: never = never, DefaultSchemaTableNameOrOptions extends
> = PublicTableNameOrOptions extends { schema: keyof Database } | keyof DefaultSchema["Tables"]
? Database[PublicTableNameOrOptions["schema"]]["Tables"][TableName] extends { | { schema: keyof Database },
Insert: infer I TableName extends DefaultSchemaTableNameOrOptions extends {
} schema: keyof Database
? I }
: never ? keyof Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"]
: PublicTableNameOrOptions extends keyof PublicSchema["Tables"] : never = never,
? PublicSchema["Tables"][PublicTableNameOrOptions] extends { > = DefaultSchemaTableNameOrOptions extends { schema: keyof Database }
Insert: infer I ? Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"][TableName] extends {
} Insert: infer I
? I }
: never ? I
: never : never
: DefaultSchemaTableNameOrOptions extends keyof DefaultSchema["Tables"]
export type TablesUpdate< ? DefaultSchema["Tables"][DefaultSchemaTableNameOrOptions] extends {
PublicTableNameOrOptions extends Insert: infer I
| keyof PublicSchema["Tables"] }
| { schema: keyof Database }, ? I
TableName extends PublicTableNameOrOptions extends { schema: keyof Database } : never
? keyof Database[PublicTableNameOrOptions["schema"]]["Tables"] : never
: never = never,
> = PublicTableNameOrOptions extends { schema: keyof Database } export type TablesUpdate<
? Database[PublicTableNameOrOptions["schema"]]["Tables"][TableName] extends { DefaultSchemaTableNameOrOptions extends
Update: infer U | keyof DefaultSchema["Tables"]
} | { schema: keyof Database },
? U TableName extends DefaultSchemaTableNameOrOptions extends {
: never schema: keyof Database
: PublicTableNameOrOptions extends keyof PublicSchema["Tables"] }
? PublicSchema["Tables"][PublicTableNameOrOptions] extends { ? keyof Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"]
Update: infer U : never = never,
} > = DefaultSchemaTableNameOrOptions extends { schema: keyof Database }
? U ? Database[DefaultSchemaTableNameOrOptions["schema"]]["Tables"][TableName] extends {
: never Update: infer U
: never }
? U
export type Enums< : never
PublicEnumNameOrOptions extends : DefaultSchemaTableNameOrOptions extends keyof DefaultSchema["Tables"]
| keyof PublicSchema["Enums"] ? DefaultSchema["Tables"][DefaultSchemaTableNameOrOptions] extends {
| { schema: keyof Database }, Update: infer U
EnumName extends PublicEnumNameOrOptions extends { schema: keyof Database } }
? keyof Database[PublicEnumNameOrOptions["schema"]]["Enums"] ? U
: never = never, : never
> = PublicEnumNameOrOptions extends { schema: keyof Database } : never
? Database[PublicEnumNameOrOptions["schema"]]["Enums"][EnumName]
: PublicEnumNameOrOptions extends keyof PublicSchema["Enums"] export type Enums<
? PublicSchema["Enums"][PublicEnumNameOrOptions] DefaultSchemaEnumNameOrOptions extends
: never | keyof DefaultSchema["Enums"]
| { schema: keyof Database },
export type CompositeTypes< EnumName extends DefaultSchemaEnumNameOrOptions extends {
PublicCompositeTypeNameOrOptions extends schema: keyof Database
| keyof PublicSchema["CompositeTypes"] }
| { schema: keyof Database }, ? keyof Database[DefaultSchemaEnumNameOrOptions["schema"]]["Enums"]
CompositeTypeName extends PublicCompositeTypeNameOrOptions extends { : never = never,
schema: keyof Database > = DefaultSchemaEnumNameOrOptions extends { schema: keyof Database }
} ? Database[DefaultSchemaEnumNameOrOptions["schema"]]["Enums"][EnumName]
? keyof Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"] : DefaultSchemaEnumNameOrOptions extends keyof DefaultSchema["Enums"]
: never = never, ? DefaultSchema["Enums"][DefaultSchemaEnumNameOrOptions]
> = PublicCompositeTypeNameOrOptions extends { schema: keyof Database } : never
? Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"][CompositeTypeName]
: PublicCompositeTypeNameOrOptions extends keyof PublicSchema["CompositeTypes"] export type CompositeTypes<
? PublicSchema["CompositeTypes"][PublicCompositeTypeNameOrOptions] PublicCompositeTypeNameOrOptions extends
: never | keyof DefaultSchema["CompositeTypes"]
| { schema: keyof Database },
CompositeTypeName extends PublicCompositeTypeNameOrOptions extends {
schema: keyof Database
}
? keyof Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"]
: never = never,
> = PublicCompositeTypeNameOrOptions extends { schema: keyof Database }
? Database[PublicCompositeTypeNameOrOptions["schema"]]["CompositeTypes"][CompositeTypeName]
: PublicCompositeTypeNameOrOptions extends keyof DefaultSchema["CompositeTypes"]
? DefaultSchema["CompositeTypes"][PublicCompositeTypeNameOrOptions]
: never
export const Constants = {
public: {
Enums: {},
},
} as const