171 lines
4.7 KiB
TypeScript
171 lines
4.7 KiB
TypeScript
import { createClient } from "@/utils/supabase/server";
|
|
import { NextResponse } from "next/server";
|
|
import { Mistral } from "@mistralai/mistralai";
|
|
import { redirect } from "next/navigation";
|
|
import { ChatCompletionChoice } from "@mistralai/mistralai/models/components";
|
|
import pLimit from "p-limit";
|
|
|
|
const apiKey = process.env.MISTRAL_API_KEY;
|
|
const client = new Mistral({ apiKey: apiKey });
|
|
|
|
const PROCESSING_PROMPT = `
|
|
You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
|
|
|
|
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content.
|
|
Any images should be included.
|
|
Do not return the Markdown as a code block, only as a raw string, without any new lines.
|
|
|
|
The Markdown should be human-readable and well-formatted.
|
|
`;
|
|
|
|
export async function POST(request: Request) {
|
|
const supabase = await createClient();
|
|
const formData = await request.formData();
|
|
const file = formData.get("file") as File;
|
|
const fileName = formData.get("fileName") as string;
|
|
const id = formData.get("id") as string;
|
|
|
|
const uploaded_pdf = await client.files.upload({
|
|
file: {
|
|
fileName,
|
|
content: file,
|
|
},
|
|
purpose: "ocr",
|
|
});
|
|
|
|
const signedUrl = await client.files.getSignedUrl({
|
|
fileId: uploaded_pdf.id,
|
|
});
|
|
|
|
const ocrResponse = await client.ocr.process({
|
|
model: "mistral-ocr-latest",
|
|
document: {
|
|
type: "document_url",
|
|
documentUrl: signedUrl.url,
|
|
},
|
|
});
|
|
|
|
const limit = pLimit(1); // Limit to 1 concurrent request (adjust as needed)
|
|
|
|
const promises: Promise<any>[] = [];
|
|
|
|
for (const page of ocrResponse.pages) {
|
|
const pagePromise = limit(async () => {
|
|
const response = await client.chat.complete({
|
|
model: "mistral-small-latest",
|
|
messages: [
|
|
{
|
|
role: "user",
|
|
content: [
|
|
{
|
|
type: "text",
|
|
text: PROCESSING_PROMPT,
|
|
},
|
|
],
|
|
},
|
|
],
|
|
});
|
|
|
|
if (!response.choices) {
|
|
console.error("No choices in response");
|
|
return;
|
|
}
|
|
|
|
const imageData: { [key: string]: string } = {};
|
|
|
|
if (page.images.length > 0) {
|
|
for (const img of page.images) {
|
|
imageData[img.id] = img.imageBase64!;
|
|
}
|
|
}
|
|
|
|
if (response.choices[0].message.content) {
|
|
const markdown = replaceImagesInMarkdown(
|
|
response.choices[0].message.content.toString(),
|
|
imageData
|
|
);
|
|
|
|
return {
|
|
...page,
|
|
markdown,
|
|
};
|
|
} else {
|
|
console.error("Message content is undefined");
|
|
}
|
|
});
|
|
|
|
promises.push(pagePromise);
|
|
}
|
|
|
|
const results = await Promise.all(promises);
|
|
const sortedResults = results.sort((a, b) => a.index - b.index);
|
|
|
|
const { data, error } = await supabase
|
|
.from("documents")
|
|
.update({
|
|
ocr_data: sortedResults,
|
|
})
|
|
.eq("id", id);
|
|
if (error) {
|
|
console.error(error);
|
|
return NextResponse.json({ error: error.message }, { status: 500 });
|
|
}
|
|
return NextResponse.json({
|
|
id,
|
|
});
|
|
}
|
|
|
|
interface OCRResponse {
|
|
pages: {
|
|
markdown: string;
|
|
images: { id: string; image_base64: string }[];
|
|
}[];
|
|
}
|
|
|
|
function replaceImagesInMarkdown(
|
|
markdownStr: string,
|
|
imagesDict: { [key: string]: string }
|
|
): string {
|
|
/**
|
|
* Replace image placeholders in markdown with base64-encoded images.
|
|
*
|
|
* Args:
|
|
* markdownStr: Markdown text containing image placeholders
|
|
* imagesDict: Dictionary mapping image IDs to base64 strings
|
|
*
|
|
* Returns:
|
|
* Markdown text with images replaced by base64 data
|
|
*/
|
|
for (const [imgName, base64Str] of Object.entries(imagesDict)) {
|
|
markdownStr = markdownStr.replace(
|
|
new RegExp(`!\\[${imgName}\\]\\(${imgName}\\)`, "g"),
|
|
``
|
|
);
|
|
}
|
|
return markdownStr;
|
|
}
|
|
|
|
function getCombinedMarkdown(ocrResponse: OCRResponse): string {
|
|
/**
|
|
* Combine OCR text and images into a single markdown document.
|
|
*
|
|
* Args:
|
|
* ocrResponse: Response from OCR processing containing text and images
|
|
*
|
|
* Returns:
|
|
* Combined markdown string with embedded images
|
|
*/
|
|
const markdowns: string[] = [];
|
|
// Extract images from page
|
|
for (const page of ocrResponse.pages) {
|
|
const imageData: { [key: string]: string } = {};
|
|
for (const img of page.images) {
|
|
imageData[img.id] = img.image_base64;
|
|
}
|
|
// Replace image placeholders with actual images
|
|
markdowns.push(replaceImagesInMarkdown(page.markdown, imageData));
|
|
}
|
|
|
|
return markdowns.join("\n\n");
|
|
}
|