better document (re)processing
This commit is contained in:
parent
3eda271635
commit
16b552262e
@ -120,7 +120,7 @@ export default async function DocumentPage(props: { params: { id: string } }) {
|
|||||||
dangerouslySetInnerHTML={{ __html: String(processedContent) }}
|
dangerouslySetInnerHTML={{ __html: String(processedContent) }}
|
||||||
></div> */}
|
></div> */}
|
||||||
<div className="mx-auto px-12 py-20 gap-2">
|
<div className="mx-auto px-12 py-20 gap-2">
|
||||||
<MarkdownRenderer rawContent={rawContent} />
|
<MarkdownRenderer document={document} />
|
||||||
</div>
|
</div>
|
||||||
</SidebarInset>
|
</SidebarInset>
|
||||||
</TTSProvider>
|
</TTSProvider>
|
||||||
|
@ -44,6 +44,7 @@ export default async function Page() {
|
|||||||
<AppSidebar
|
<AppSidebar
|
||||||
documents={documents.map((d) => {
|
documents={documents.map((d) => {
|
||||||
return {
|
return {
|
||||||
|
id: d.id,
|
||||||
disabled: d.is_processing,
|
disabled: d.is_processing,
|
||||||
name: d.file_name,
|
name: d.file_name,
|
||||||
url: `/dashboard/documents/${d.id}`,
|
url: `/dashboard/documents/${d.id}`,
|
||||||
|
@ -1,170 +0,0 @@
|
|||||||
import { createClient } from "@/utils/supabase/server";
|
|
||||||
import { NextResponse } from "next/server";
|
|
||||||
import { Mistral } from "@mistralai/mistralai";
|
|
||||||
import { redirect } from "next/navigation";
|
|
||||||
import { ChatCompletionChoice } from "@mistralai/mistralai/models/components";
|
|
||||||
import pLimit from "p-limit";
|
|
||||||
|
|
||||||
const apiKey = process.env.MISTRAL_API_KEY;
|
|
||||||
const client = new Mistral({ apiKey: apiKey });
|
|
||||||
|
|
||||||
const PROCESSING_PROMPT = `
|
|
||||||
You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
|
|
||||||
|
|
||||||
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content.
|
|
||||||
Any images should be included.
|
|
||||||
Do not return the Markdown as a code block, only as a raw string, without any new lines.
|
|
||||||
|
|
||||||
The Markdown should be human-readable and well-formatted.
|
|
||||||
`;
|
|
||||||
|
|
||||||
export async function POST(request: Request) {
|
|
||||||
const supabase = await createClient();
|
|
||||||
const formData = await request.formData();
|
|
||||||
const file = formData.get("file") as File;
|
|
||||||
const fileName = formData.get("fileName") as string;
|
|
||||||
const id = formData.get("id") as string;
|
|
||||||
|
|
||||||
const uploaded_pdf = await client.files.upload({
|
|
||||||
file: {
|
|
||||||
fileName,
|
|
||||||
content: file,
|
|
||||||
},
|
|
||||||
purpose: "ocr",
|
|
||||||
});
|
|
||||||
|
|
||||||
const signedUrl = await client.files.getSignedUrl({
|
|
||||||
fileId: uploaded_pdf.id,
|
|
||||||
});
|
|
||||||
|
|
||||||
const ocrResponse = await client.ocr.process({
|
|
||||||
model: "mistral-ocr-latest",
|
|
||||||
document: {
|
|
||||||
type: "document_url",
|
|
||||||
documentUrl: signedUrl.url,
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
const limit = pLimit(1); // Limit to 1 concurrent request (adjust as needed)
|
|
||||||
|
|
||||||
const promises: Promise<any>[] = [];
|
|
||||||
|
|
||||||
for (const page of ocrResponse.pages) {
|
|
||||||
const pagePromise = limit(async () => {
|
|
||||||
const response = await client.chat.complete({
|
|
||||||
model: "mistral-small-latest",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "user",
|
|
||||||
content: [
|
|
||||||
{
|
|
||||||
type: "text",
|
|
||||||
text: PROCESSING_PROMPT,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
},
|
|
||||||
],
|
|
||||||
});
|
|
||||||
|
|
||||||
if (!response.choices) {
|
|
||||||
console.error("No choices in response");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const imageData: { [key: string]: string } = {};
|
|
||||||
|
|
||||||
if (page.images.length > 0) {
|
|
||||||
for (const img of page.images) {
|
|
||||||
imageData[img.id] = img.imageBase64!;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (response.choices[0].message.content) {
|
|
||||||
const markdown = replaceImagesInMarkdown(
|
|
||||||
response.choices[0].message.content.toString(),
|
|
||||||
imageData
|
|
||||||
);
|
|
||||||
|
|
||||||
return {
|
|
||||||
...page,
|
|
||||||
markdown,
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
console.error("Message content is undefined");
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
promises.push(pagePromise);
|
|
||||||
}
|
|
||||||
|
|
||||||
const results = await Promise.all(promises);
|
|
||||||
const sortedResults = results.sort((a, b) => a.index - b.index);
|
|
||||||
|
|
||||||
const { data, error } = await supabase
|
|
||||||
.from("documents")
|
|
||||||
.update({
|
|
||||||
ocr_data: sortedResults,
|
|
||||||
})
|
|
||||||
.eq("id", id);
|
|
||||||
if (error) {
|
|
||||||
console.error(error);
|
|
||||||
return NextResponse.json({ error: error.message }, { status: 500 });
|
|
||||||
}
|
|
||||||
return NextResponse.json({
|
|
||||||
id,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
interface OCRResponse {
|
|
||||||
pages: {
|
|
||||||
markdown: string;
|
|
||||||
images: { id: string; image_base64: string }[];
|
|
||||||
}[];
|
|
||||||
}
|
|
||||||
|
|
||||||
function replaceImagesInMarkdown(
|
|
||||||
markdownStr: string,
|
|
||||||
imagesDict: { [key: string]: string }
|
|
||||||
): string {
|
|
||||||
/**
|
|
||||||
* Replace image placeholders in markdown with base64-encoded images.
|
|
||||||
*
|
|
||||||
* Args:
|
|
||||||
* markdownStr: Markdown text containing image placeholders
|
|
||||||
* imagesDict: Dictionary mapping image IDs to base64 strings
|
|
||||||
*
|
|
||||||
* Returns:
|
|
||||||
* Markdown text with images replaced by base64 data
|
|
||||||
*/
|
|
||||||
for (const [imgName, base64Str] of Object.entries(imagesDict)) {
|
|
||||||
markdownStr = markdownStr.replace(
|
|
||||||
new RegExp(`!\\[${imgName}\\]\\(${imgName}\\)`, "g"),
|
|
||||||
``
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return markdownStr;
|
|
||||||
}
|
|
||||||
|
|
||||||
function getCombinedMarkdown(ocrResponse: OCRResponse): string {
|
|
||||||
/**
|
|
||||||
* Combine OCR text and images into a single markdown document.
|
|
||||||
*
|
|
||||||
* Args:
|
|
||||||
* ocrResponse: Response from OCR processing containing text and images
|
|
||||||
*
|
|
||||||
* Returns:
|
|
||||||
* Combined markdown string with embedded images
|
|
||||||
*/
|
|
||||||
const markdowns: string[] = [];
|
|
||||||
// Extract images from page
|
|
||||||
for (const page of ocrResponse.pages) {
|
|
||||||
const imageData: { [key: string]: string } = {};
|
|
||||||
for (const img of page.images) {
|
|
||||||
imageData[img.id] = img.image_base64;
|
|
||||||
}
|
|
||||||
// Replace image placeholders with actual images
|
|
||||||
markdowns.push(replaceImagesInMarkdown(page.markdown, imageData));
|
|
||||||
}
|
|
||||||
|
|
||||||
return markdowns.join("\n\n");
|
|
||||||
}
|
|
@ -9,16 +9,29 @@ import ReactMarkdown, { Components } from "react-markdown";
|
|||||||
import rehypeRaw from "rehype-raw";
|
import rehypeRaw from "rehype-raw";
|
||||||
import { useTTS } from "./TTSProvider";
|
import { useTTS } from "./TTSProvider";
|
||||||
import rehypeHighlight from "@/lib/utils";
|
import rehypeHighlight from "@/lib/utils";
|
||||||
|
import { Database } from "@/utils/supabase/types";
|
||||||
|
|
||||||
// Utility to escape regex special characters:
|
// Utility to escape regex special characters:
|
||||||
function escapeRegExp(text: string) {
|
function escapeRegExp(text: string) {
|
||||||
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
|
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export type OCRData = {
|
||||||
|
index: number;
|
||||||
|
images: string[];
|
||||||
|
markdown: string;
|
||||||
|
citations: Record<string, string>;
|
||||||
|
dimensions: {
|
||||||
|
dpi: number;
|
||||||
|
width: number;
|
||||||
|
height: number;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
export default function MarkdownRenderer({
|
export default function MarkdownRenderer({
|
||||||
rawContent,
|
document,
|
||||||
}: {
|
}: {
|
||||||
rawContent: string;
|
document: Database["public"]["Tables"]["documents"]["Row"];
|
||||||
}) {
|
}) {
|
||||||
// Obtain TTS info from context.
|
// Obtain TTS info from context.
|
||||||
// TTSProvider is already wrapping this component higher in the tree.
|
// TTSProvider is already wrapping this component higher in the tree.
|
||||||
@ -36,6 +49,34 @@ export default function MarkdownRenderer({
|
|||||||
[textToHighlight]
|
[textToHighlight]
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const ocr = document?.ocr_data as OCRData[];
|
||||||
|
|
||||||
|
const rawContent = ocr.map((page) => page.markdown).join("\n") || "";
|
||||||
|
|
||||||
|
const citations: {
|
||||||
|
text: string;
|
||||||
|
page: number;
|
||||||
|
index: string;
|
||||||
|
number: number;
|
||||||
|
}[] = [];
|
||||||
|
const totalPages = ocr.length;
|
||||||
|
const totalSentences = sentences.length;
|
||||||
|
|
||||||
|
let totalCitations = 0;
|
||||||
|
ocr.forEach((page) => {
|
||||||
|
Object.entries(page.citations).forEach(([key, value]) => {
|
||||||
|
if (value) {
|
||||||
|
totalCitations++;
|
||||||
|
citations.push({
|
||||||
|
text: value,
|
||||||
|
page: page.index,
|
||||||
|
index: key,
|
||||||
|
number: Number(totalCitations),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
const components: Components = {
|
const components: Components = {
|
||||||
h1: ({ node, ...props }) => (
|
h1: ({ node, ...props }) => (
|
||||||
<h1 className="text-2xl font-semibold mb-4 text-white" {...props} />
|
<h1 className="text-2xl font-semibold mb-4 text-white" {...props} />
|
||||||
@ -77,7 +118,24 @@ export default function MarkdownRenderer({
|
|||||||
{...props}
|
{...props}
|
||||||
/>
|
/>
|
||||||
),
|
),
|
||||||
sup: ({ node, ...props }) => (
|
sup: ({ node, ...props }) => {
|
||||||
|
// Check if the text contains a reference number
|
||||||
|
const text = props.children!.toString();
|
||||||
|
|
||||||
|
const referenceNumber = text;
|
||||||
|
if (!referenceNumber) {
|
||||||
|
return <sup {...props} data-error="no reference number" />;
|
||||||
|
}
|
||||||
|
|
||||||
|
const citation = citations.find(
|
||||||
|
(c) => c.index === referenceNumber && c.page === page.index
|
||||||
|
);
|
||||||
|
|
||||||
|
if (!citation) {
|
||||||
|
return <sup {...props} data-error="no citation found" />;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (
|
||||||
// TODO: get the references from the document and display them in a popover
|
// TODO: get the references from the document and display them in a popover
|
||||||
<Popover>
|
<Popover>
|
||||||
<PopoverTrigger asChild>
|
<PopoverTrigger asChild>
|
||||||
@ -89,11 +147,12 @@ export default function MarkdownRenderer({
|
|||||||
<PopoverContent className="w-56 overflow-hidden rounded-lg p-0">
|
<PopoverContent className="w-56 overflow-hidden rounded-lg p-0">
|
||||||
<div className="p-4">
|
<div className="p-4">
|
||||||
{/* Replace with actual reference content */}
|
{/* Replace with actual reference content */}
|
||||||
<p>Reference content goes here.</p>
|
<p>{citation.text}</p>
|
||||||
</div>
|
</div>
|
||||||
</PopoverContent>
|
</PopoverContent>
|
||||||
</Popover>
|
</Popover>
|
||||||
),
|
);
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
@ -64,12 +64,6 @@ export default function UploadZone({ user }: { user?: { id: string } }) {
|
|||||||
eventSource.close();
|
eventSource.close();
|
||||||
});
|
});
|
||||||
|
|
||||||
// // Invoke the serverless function
|
|
||||||
// supabase.functions.invoke("process-document", {
|
|
||||||
// body,
|
|
||||||
// method: "POST",
|
|
||||||
// });
|
|
||||||
|
|
||||||
toast.info(
|
toast.info(
|
||||||
"Document is being processed in the background. You will be notified when it's ready."
|
"Document is being processed in the background. You will be notified when it's ready."
|
||||||
);
|
);
|
||||||
|
@ -33,7 +33,13 @@ export function AppSidebar({
|
|||||||
documents,
|
documents,
|
||||||
...props
|
...props
|
||||||
}: React.ComponentProps<typeof Sidebar> & {
|
}: React.ComponentProps<typeof Sidebar> & {
|
||||||
documents?: Array<{ name: string; url: string; emoji?: string }>;
|
documents: Array<{
|
||||||
|
id: string;
|
||||||
|
disabled?: boolean;
|
||||||
|
name: string;
|
||||||
|
url: string;
|
||||||
|
emoji?: string;
|
||||||
|
}>;
|
||||||
}) {
|
}) {
|
||||||
const data = {
|
const data = {
|
||||||
navMain: [
|
navMain: [
|
||||||
|
@ -6,6 +6,7 @@ import {
|
|||||||
Link,
|
Link,
|
||||||
LoaderCircle,
|
LoaderCircle,
|
||||||
MoreHorizontal,
|
MoreHorizontal,
|
||||||
|
RefreshCw,
|
||||||
StarOff,
|
StarOff,
|
||||||
Trash2,
|
Trash2,
|
||||||
} from "lucide-react";
|
} from "lucide-react";
|
||||||
@ -26,11 +27,14 @@ import {
|
|||||||
SidebarMenuItem,
|
SidebarMenuItem,
|
||||||
useSidebar,
|
useSidebar,
|
||||||
} from "@/components/ui/sidebar";
|
} from "@/components/ui/sidebar";
|
||||||
|
import { createClient } from "@/utils/supabase/client";
|
||||||
|
import { toast } from "sonner";
|
||||||
|
|
||||||
export function NavDocuments({
|
export function NavDocuments({
|
||||||
documents,
|
documents,
|
||||||
}: {
|
}: {
|
||||||
documents: {
|
documents: {
|
||||||
|
id: string;
|
||||||
disabled?: boolean;
|
disabled?: boolean;
|
||||||
name: string;
|
name: string;
|
||||||
url: string;
|
url: string;
|
||||||
@ -38,13 +42,14 @@ export function NavDocuments({
|
|||||||
}[];
|
}[];
|
||||||
}) {
|
}) {
|
||||||
const { isMobile } = useSidebar();
|
const { isMobile } = useSidebar();
|
||||||
|
const supabase = createClient();
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<SidebarGroup className="group-data-[collapsible=icon]:hidden">
|
<SidebarGroup className="group-data-[collapsible=icon]:hidden">
|
||||||
<SidebarGroupLabel>Documents</SidebarGroupLabel>
|
<SidebarGroupLabel>Documents</SidebarGroupLabel>
|
||||||
<SidebarMenu>
|
<SidebarMenu>
|
||||||
{documents.map((item) => (
|
{documents.map((item) => (
|
||||||
<SidebarMenuItem key={item.name} aria-disabled={item.disabled}>
|
<SidebarMenuItem key={item.id} aria-disabled={item.disabled}>
|
||||||
<SidebarMenuButton asChild disabled={item.disabled}>
|
<SidebarMenuButton asChild disabled={item.disabled}>
|
||||||
<a href={item.url} title={item.name}>
|
<a href={item.url} title={item.name}>
|
||||||
{item.disabled ? (
|
{item.disabled ? (
|
||||||
@ -67,18 +72,43 @@ export function NavDocuments({
|
|||||||
side={isMobile ? "bottom" : "right"}
|
side={isMobile ? "bottom" : "right"}
|
||||||
align={isMobile ? "end" : "start"}
|
align={isMobile ? "end" : "start"}
|
||||||
>
|
>
|
||||||
<DropdownMenuItem>
|
<DropdownMenuItem
|
||||||
<StarOff className="text-muted-foreground" />
|
onClick={async () => {
|
||||||
<span>Remove from Favorites</span>
|
const data = new FormData();
|
||||||
</DropdownMenuItem>
|
|
||||||
<DropdownMenuSeparator />
|
const session = await supabase.auth.getSession();
|
||||||
<DropdownMenuItem>
|
if (!session.data.session) {
|
||||||
<Link className="text-muted-foreground" />
|
toast.error("You are not logged in");
|
||||||
<span>Copy Link</span>
|
return;
|
||||||
</DropdownMenuItem>
|
}
|
||||||
<DropdownMenuItem>
|
|
||||||
<ArrowUpRight className="text-muted-foreground" />
|
data.append("id", item.id);
|
||||||
<span>Open in New Tab</span>
|
data.append(
|
||||||
|
"access_token",
|
||||||
|
session.data.session.access_token
|
||||||
|
);
|
||||||
|
data.append(
|
||||||
|
"refresh_token",
|
||||||
|
session.data.session.refresh_token
|
||||||
|
);
|
||||||
|
|
||||||
|
toast.promise(
|
||||||
|
supabase.functions.invoke("process-document", {
|
||||||
|
body: data,
|
||||||
|
}),
|
||||||
|
{
|
||||||
|
loading: "Reprocessing document...",
|
||||||
|
success: "Document reprocessed successfully",
|
||||||
|
error: (err) => {
|
||||||
|
console.error("Error reprocessing document:", err);
|
||||||
|
return "Failed to reprocess document";
|
||||||
|
},
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<RefreshCw className="text-muted-foreground" />
|
||||||
|
<span>Reprocess Document</span>
|
||||||
</DropdownMenuItem>
|
</DropdownMenuItem>
|
||||||
<DropdownMenuSeparator />
|
<DropdownMenuSeparator />
|
||||||
<DropdownMenuItem>
|
<DropdownMenuItem>
|
||||||
|
@ -32,11 +32,16 @@ Return the final result as a text object with the following structure (without c
|
|||||||
---------
|
---------
|
||||||
|
|
||||||
{
|
{
|
||||||
"citations": {
|
"citations": [
|
||||||
"1": "Citation text for reference 1",
|
{
|
||||||
"2": "Citation text for reference 2",
|
"number": 1, // The number as it appears in the text
|
||||||
// ... more citations
|
"text": "Citation text 1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"number": 2,
|
||||||
|
"text": "Citation text 2"
|
||||||
}
|
}
|
||||||
|
]
|
||||||
}
|
}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@ -97,12 +102,16 @@ Deno.serve(async (req) => {
|
|||||||
Deno.env.get("SUPABASE_ANON_KEY")
|
Deno.env.get("SUPABASE_ANON_KEY")
|
||||||
);
|
);
|
||||||
|
|
||||||
|
const supabaseServer = createClient(
|
||||||
|
Deno.env.get("SUPABASE_URL"),
|
||||||
|
Deno.env.get("SUPABASE_SERVICE_ROLE_KEY")
|
||||||
|
);
|
||||||
|
|
||||||
const formData = await req.formData();
|
const formData = await req.formData();
|
||||||
const file = formData.get("file");
|
|
||||||
const accessToken = formData.get("access_token");
|
const accessToken = formData.get("access_token");
|
||||||
const refreshToken = formData.get("refresh_token");
|
const refreshToken = formData.get("refresh_token");
|
||||||
const fileName = file.name;
|
var reprocessing = false;
|
||||||
const uuid = crypto.randomUUID();
|
var uuid = crypto.randomUUID();
|
||||||
|
|
||||||
const {
|
const {
|
||||||
data: { user },
|
data: { user },
|
||||||
@ -121,6 +130,101 @@ Deno.serve(async (req) => {
|
|||||||
throw new Error("Setting session failed");
|
throw new Error("Setting session failed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (formData.has("id")) {
|
||||||
|
console.log("Reprocessing document...");
|
||||||
|
reprocessing = true;
|
||||||
|
console.log("File ID found in form data.");
|
||||||
|
sendEvent("status", {
|
||||||
|
message: "File ID found in form data.",
|
||||||
|
});
|
||||||
|
const docId = formData.get("id");
|
||||||
|
|
||||||
|
const { data: documentData, error: documentError } = await supabase
|
||||||
|
.from("documents")
|
||||||
|
.select("*")
|
||||||
|
.eq("id", docId)
|
||||||
|
.single();
|
||||||
|
|
||||||
|
if (documentError) {
|
||||||
|
console.error("Error fetching document record:", documentError);
|
||||||
|
sendEvent("error", {
|
||||||
|
message: "Error fetching document record",
|
||||||
|
error: documentError,
|
||||||
|
});
|
||||||
|
throw new Error("Document record fetch failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (documentData) {
|
||||||
|
await supabase
|
||||||
|
.from("documents")
|
||||||
|
.update({
|
||||||
|
is_processing: true,
|
||||||
|
})
|
||||||
|
.eq("id", documentData.id);
|
||||||
|
uuid = documentData.id;
|
||||||
|
} else {
|
||||||
|
console.error("Document record not found.");
|
||||||
|
sendEvent("error", {
|
||||||
|
message: "Document record not found",
|
||||||
|
});
|
||||||
|
throw new Error("Document record not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
const { data: storageData, error: storageError } = await supabaseServer
|
||||||
|
.from("storage.objects")
|
||||||
|
.select("name")
|
||||||
|
.eq("id", documentData.raw_file)
|
||||||
|
.single();
|
||||||
|
|
||||||
|
if (storageError) {
|
||||||
|
console.error("Error fetching file name:", storageError);
|
||||||
|
sendEvent("error", {
|
||||||
|
message: "Error fetching file name",
|
||||||
|
error: storageError,
|
||||||
|
});
|
||||||
|
throw new Error("Storage data fetch failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
const { data: fileData, error: fileError } = await supabase.storage
|
||||||
|
.from("documents")
|
||||||
|
.download(storageData.name);
|
||||||
|
|
||||||
|
if (fileError) {
|
||||||
|
console.error("Error downloading file from storage:", fileError);
|
||||||
|
sendEvent("error", {
|
||||||
|
message: "Error downloading file from storage",
|
||||||
|
error: fileError,
|
||||||
|
});
|
||||||
|
throw new Error("File download failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("File downloaded from storage:", fileData);
|
||||||
|
sendEvent("status", {
|
||||||
|
message: "File downloaded from storage",
|
||||||
|
fileData,
|
||||||
|
});
|
||||||
|
|
||||||
|
formData.set("file", fileData);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!formData.has("file")) {
|
||||||
|
console.error("File not found in form data.");
|
||||||
|
sendEvent("error", {
|
||||||
|
message: "File not found in form data",
|
||||||
|
});
|
||||||
|
throw new Error("File not found");
|
||||||
|
}
|
||||||
|
if (!formData.has("access_token") || !formData.has("refresh_token")) {
|
||||||
|
console.error("Access token or refresh token not found in form data.");
|
||||||
|
sendEvent("error", {
|
||||||
|
message: "Access token or refresh token not found in form data",
|
||||||
|
});
|
||||||
|
throw new Error("Tokens not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
const file = formData.get("file") as File;
|
||||||
|
const fileName = file.name;
|
||||||
|
|
||||||
console.log("Generated UUID:", uuid);
|
console.log("Generated UUID:", uuid);
|
||||||
sendEvent("status", {
|
sendEvent("status", {
|
||||||
message: "Generated UUID",
|
message: "Generated UUID",
|
||||||
@ -133,7 +237,9 @@ Deno.serve(async (req) => {
|
|||||||
user,
|
user,
|
||||||
});
|
});
|
||||||
|
|
||||||
const { data: storageData, error: storageError } = await supabase.storage
|
if (!reprocessing) {
|
||||||
|
const { data: storageData, error: storageError } =
|
||||||
|
await supabase.storage
|
||||||
.from("documents")
|
.from("documents")
|
||||||
.upload(`${user.id}/${uuid}.pdf`, file);
|
.upload(`${user.id}/${uuid}.pdf`, file);
|
||||||
|
|
||||||
@ -173,6 +279,31 @@ Deno.serve(async (req) => {
|
|||||||
sendEvent("status", {
|
sendEvent("status", {
|
||||||
message: "Document record inserted successfully",
|
message: "Document record inserted successfully",
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
console.log("Reprocessing document...");
|
||||||
|
sendEvent("status", {
|
||||||
|
message: "Reprocessing document",
|
||||||
|
});
|
||||||
|
|
||||||
|
const { error: docError } = await supabase
|
||||||
|
.from("documents")
|
||||||
|
.update({
|
||||||
|
is_processing: true,
|
||||||
|
})
|
||||||
|
.eq("id", uuid);
|
||||||
|
if (docError) {
|
||||||
|
console.error("Error updating document record:", docError);
|
||||||
|
sendEvent("error", {
|
||||||
|
message: "Error updating document record",
|
||||||
|
error: docError,
|
||||||
|
});
|
||||||
|
throw new Error("Document record update failed");
|
||||||
|
}
|
||||||
|
console.log("Document record updated successfully.");
|
||||||
|
sendEvent("status", {
|
||||||
|
message: "Document record updated successfully",
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
console.log("Uploading file to Mistral...");
|
console.log("Uploading file to Mistral...");
|
||||||
sendEvent("status", {
|
sendEvent("status", {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user