Enhance document processing by updating Markdown handling, adding processing status to uploads, and improving error messaging in the upload component

This commit is contained in:
Jack Merrill 2025-04-18 00:29:19 -04:00
parent 2a9139744a
commit 8f70d83785
No known key found for this signature in database
GPG Key ID: FD574AFF96E99636
6 changed files with 39 additions and 17 deletions

View File

@ -68,7 +68,7 @@ export default async function DocumentPage(props: { params: { id: string } }) {
const processedContent = await remark()
.use(remarkHtml)
.process(pages.join(" "));
.process(pages.join("\n"));
return (
<SidebarProvider>

View File

@ -30,7 +30,7 @@ export default async function Page() {
const { data: documents, error } = await supabase
.from("documents")
.select("id, file_name, created_at, owner")
.select("id, file_name, created_at, owner, is_processing")
.eq("owner", user.id)
.order("created_at", { ascending: false });
@ -44,6 +44,7 @@ export default async function Page() {
<AppSidebar
documents={documents.map((d) => {
return {
disabled: d.is_processing,
name: d.file_name,
url: `/dashboard/documents/${d.id}`,
emoji: "📄",

BIN
bun.lockb

Binary file not shown.

View File

@ -49,7 +49,7 @@ export default function UploadZone({ user }: { user?: { id: string } }) {
eventSource.addEventListener("error", (event) => {
console.error("SSE Error:", event);
toast.error("An error occurred while processing the document.", {
toast.error("An error occurred while processing the document", {
description: event.data || "Unknown error",
});
setUploading(false);

View File

@ -4,6 +4,7 @@ import {
ArrowUpRight,
FileText,
Link,
LoaderCircle,
MoreHorizontal,
StarOff,
Trash2,
@ -30,6 +31,7 @@ export function NavDocuments({
documents,
}: {
documents: {
disabled?: boolean;
name: string;
url: string;
emoji?: string;
@ -42,10 +44,14 @@ export function NavDocuments({
<SidebarGroupLabel>Documents</SidebarGroupLabel>
<SidebarMenu>
{documents.map((item) => (
<SidebarMenuItem key={item.name}>
<SidebarMenuButton asChild>
<SidebarMenuItem key={item.name} aria-disabled={item.disabled}>
<SidebarMenuButton asChild disabled={item.disabled}>
<a href={item.url} title={item.name}>
<span>{item.emoji ? item.emoji : <FileText />}</span>
{item.disabled ? (
<LoaderCircle className="animate-spin text-muted-foreground" />
) : (
<span>{item.emoji ? item.emoji : <FileText />}</span>
)}
<span>{item.name}</span>
</a>
</SidebarMenuButton>

View File

@ -20,18 +20,27 @@ Do not return the Markdown as a code block, only as a raw string, without any ne
No data or information should ever be removed, it should only be processed and formatted.
There are in-text citations/references in the text, remove them from the text and put them into an object where the key is the reference number and the value is the text.
There are in-text citations/references in the text, remove them from the text (**but most importantly, keep the reference number in the text. use a <sup></sup> tag**) and put them into an object where the key is the reference number and the value is the text.
The Markdown should be human-readable and well-formatted.
The Markdown should be human-readable and well-formatted. The markdown string should properly sanitized and should not break a JSON parser when returned as the final format.
Return the final result as a text object with the following structure (without code block formatting):
"""
<processed markdown text>
---------
Return the final result as a JSON object with the following structure:
{
"markdown": "<processed_markdown>",
"citations": {
"1": "<citation_text_1>",
"2": "<citation_text_2>"
"1": "Citation text for reference 1",
"2": "Citation text for reference 2",
// ... more citations
}
}
"""
Do not return the text object as a code block, only as a raw string.
`;
Deno.serve(async (req) => {
if (req.method === "OPTIONS") {
@ -276,16 +285,22 @@ Deno.serve(async (req) => {
}
if (response.choices[0].message.content) {
const markdownResponse = JSON.parse(
response.choices[0].message.content.toString()
// remove any potential code block formatting from the content
console.log(
`[${page.index}] ${response.choices[0].message.content}`
);
const citations = markdownResponse.citations;
const markdown = markdownResponse.markdown;
const split =
response.choices[0].message.content.split("---------");
const content = split[0].trim();
const citationsStr = split[1]?.trim() || "{}";
const citations = JSON.parse(citationsStr).citations || {};
console.log("Generating Markdown for page:", page.index);
sendEvent("status", {
message: `Generating Markdown for page ${page.index}`,
});
const markdown = replaceImagesInMarkdown(markdown, imageData);
const markdown = replaceImagesInMarkdown(content, imageData);
return {
...page,