267 lines
7.9 KiB
TypeScript
267 lines
7.9 KiB
TypeScript
"use client";
|
|
import {
|
|
Popover,
|
|
PopoverTrigger,
|
|
PopoverContent,
|
|
} from "@/components/ui/popover";
|
|
import { useMemo } from "react";
|
|
import ReactMarkdown, { Components } from "react-markdown";
|
|
import rehypeRaw from "rehype-raw";
|
|
import { useTTS } from "./TTSProvider";
|
|
import rehypeHighlight from "@/lib/utils";
|
|
import { Database } from "@/utils/supabase/types";
|
|
import remarkGfm from "remark-gfm";
|
|
|
|
// Utility to escape regex special characters:
|
|
function escapeRegExp(text: string) {
|
|
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
|
|
}
|
|
|
|
export type OCRData = {
|
|
index: number;
|
|
images: {
|
|
id: string;
|
|
topLeftX: number;
|
|
topLeftY: number;
|
|
bottomRightX: number;
|
|
bottomRightY: number;
|
|
imageBase64: string;
|
|
}[];
|
|
markdown: string;
|
|
citations: {
|
|
text: string;
|
|
number: string;
|
|
}[];
|
|
dimensions: {
|
|
dpi: number;
|
|
width: number;
|
|
height: number;
|
|
};
|
|
};
|
|
|
|
export default function MarkdownRenderer({
|
|
document,
|
|
}: {
|
|
document: Database["public"]["Tables"]["documents"]["Row"];
|
|
}) {
|
|
// Obtain TTS info from context.
|
|
// TTSProvider is already wrapping this component higher in the tree.
|
|
const { currentSentence, sentences } = useTTS();
|
|
|
|
// Determine the text to highlight.
|
|
const textToHighlight = useMemo(() => {
|
|
if (!sentences || sentences.length === 0) return "";
|
|
return sentences[currentSentence] || "";
|
|
}, [sentences, currentSentence]);
|
|
|
|
// Setup rehype plugins including our highlight plugin.
|
|
const rehypePlugins = useMemo(
|
|
() => [rehypeRaw, [rehypeHighlight, { textToHighlight }] as any],
|
|
[textToHighlight]
|
|
);
|
|
|
|
const ocr = document?.ocr_data as OCRData[];
|
|
|
|
// Join all markdown content from the OCR data into a single string. Add page separators.
|
|
const rawContent = ocr
|
|
.map((page) => page.markdown)
|
|
.map((page, index) => {
|
|
const pageIndex = index + 1; // 1-based index for pages
|
|
const pageSeparator = `\n\n###### Page ${pageIndex}\n\n`;
|
|
return `${page}${pageSeparator}`;
|
|
})
|
|
.join("\n\n");
|
|
|
|
const citations: {
|
|
text: string;
|
|
page: number;
|
|
index: string;
|
|
number: number;
|
|
inTextNumber: string;
|
|
}[] = [];
|
|
const totalPages = ocr.length;
|
|
const totalSentences = sentences.length;
|
|
|
|
let totalCitations = 0;
|
|
ocr.forEach((page) => {
|
|
// each page has its own citations (1-N), so we need to map them correctly
|
|
page.citations.forEach((citation, index) => {
|
|
totalCitations += 1;
|
|
citations.push({
|
|
text: citation.text,
|
|
page: page.index,
|
|
index: (totalCitations + index).toString(), // unique index across all pages
|
|
number: totalCitations + index + 1, // 1-based numbering
|
|
inTextNumber: citation.number,
|
|
});
|
|
});
|
|
});
|
|
|
|
const components: Components = {
|
|
h1: ({ node, ...props }) => (
|
|
<h1 className="text-2xl font-semibold mb-4 text-white" {...props} />
|
|
),
|
|
h2: ({ node, ...props }) => (
|
|
<h2 className="text-xl font-medium mb-3 text-white" {...props} />
|
|
),
|
|
h3: ({ node, ...props }) => (
|
|
<h3 className="text-lg font-medium mb-2 text-gray-300" {...props} />
|
|
),
|
|
h4: ({ node, ...props }) => (
|
|
<h4 className="text-lg font-bold mb-2 text-gray-300" {...props} />
|
|
),
|
|
h6: ({ node, ...props }) => {
|
|
const text = props.children!.toString().split(" ")[1];
|
|
const pageIndex = parseInt(text) - 1; // Convert to 0-based index
|
|
const page = ocr[pageIndex];
|
|
|
|
return (
|
|
<div className="relative flex py-5 items-center">
|
|
<div className="flex-grow border-t border-slate-800"></div>
|
|
<span className="flex-shrink mx-4 text-gray-400">
|
|
Page {pageIndex + 1} of {totalPages}
|
|
</span>
|
|
<div className="flex-grow border-t border-slate-800"></div>
|
|
</div>
|
|
);
|
|
},
|
|
p: ({ node, ...props }) => (
|
|
<p className="leading-7 text-gray-200 mb-3" {...props} />
|
|
),
|
|
img: ({ node, ...props }) => {
|
|
const { src, alt } = props as any;
|
|
|
|
const pageIndex = ocr.findIndex((p) =>
|
|
p.images.find((image) => image.id === src)
|
|
);
|
|
if (pageIndex === -1) return null; // Handle the case where the page is not found
|
|
|
|
const page = ocr.find((p) => p.index === pageIndex);
|
|
if (!page) return null; // Handle the case where the page is not found
|
|
|
|
const img = page.images.find((image) => {
|
|
if (image.id === src) {
|
|
return true;
|
|
}
|
|
});
|
|
|
|
if (!img) return null; // Handle the case where the image is not found
|
|
|
|
// Calculate the width and height based on the image dimensions and DPI
|
|
const dpi = page.dimensions.dpi;
|
|
const width = ((img.bottomRightX - img.topLeftX) / dpi) * 96; // Convert to pixels
|
|
const height = ((img.bottomRightY - img.topLeftY) / dpi) * 96; // Convert to pixels
|
|
|
|
return (
|
|
<div className="w-full flex justify-center mb-4">
|
|
<img
|
|
className="rounded-lg shadow-sm"
|
|
data-image
|
|
src={img.imageBase64}
|
|
alt={alt}
|
|
width={width}
|
|
height={height}
|
|
/>
|
|
</div>
|
|
);
|
|
},
|
|
a: ({ node, ...props }) => (
|
|
<a className="text-blue-400 hover:underline" {...props} />
|
|
),
|
|
strong: ({ node, ...props }) => (
|
|
<strong className="text-gray-200 font-semibold" {...props} />
|
|
),
|
|
blockquote: ({ node, ...props }) => (
|
|
<blockquote
|
|
className="italic border-l-4 pl-4 border-gray-600 text-gray-300"
|
|
{...props}
|
|
/>
|
|
),
|
|
code: ({ node, ...props }) => (
|
|
<code
|
|
className="bg-gray-800 rounded px-1 py-0.5 text-gray-200"
|
|
{...props}
|
|
/>
|
|
),
|
|
sup: ({ node, ...props }) => {
|
|
// Check if the text contains a reference number
|
|
if (!props.children) {
|
|
return <sup {...props} />;
|
|
}
|
|
|
|
const text = props.children!.toString();
|
|
|
|
const referenceNumber = text;
|
|
if (!referenceNumber) {
|
|
return <sup {...props} data-error="no reference number" />;
|
|
}
|
|
|
|
const citation = citations.find(
|
|
(c) =>
|
|
c.index === referenceNumber || c.number.toString() === referenceNumber
|
|
);
|
|
|
|
if (!citation) {
|
|
return <sup {...props} data-error="no citation found" />;
|
|
}
|
|
|
|
return (
|
|
<Popover>
|
|
<PopoverTrigger asChild>
|
|
<sup
|
|
className="text-gray-200 cursor-pointer underline hover:cursor-pointer"
|
|
{...props}
|
|
/>
|
|
</PopoverTrigger>
|
|
<PopoverContent className="w-auto max-w-3xl bg-gray-900 overflow-hidden rounded-lg p-0">
|
|
<div className="p-4">
|
|
<p>{citation.text}</p>
|
|
<p className="text-sm text-gray-500">
|
|
Page {citation.page}, Reference {citation.inTextNumber}
|
|
</p>
|
|
</div>
|
|
</PopoverContent>
|
|
</Popover>
|
|
);
|
|
},
|
|
table: ({ node, ...props }) => (
|
|
<div className="relative overflow-x-auto shadow-md sm:rounded-lg">
|
|
<table
|
|
{...props}
|
|
className="w-full text-sm text-left rtl:text-right text-gray-500 dark:text-gray-400"
|
|
/>
|
|
</div>
|
|
),
|
|
thead: ({ node, ...props }) => (
|
|
<thead
|
|
{...props}
|
|
className="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-700 dark:text-gray-400"
|
|
/>
|
|
),
|
|
tbody: ({ node, ...props }) => <tbody {...props} />,
|
|
tr: ({ node, ...props }) => (
|
|
<tr
|
|
{...props}
|
|
className="odd:bg-white odd:dark:bg-gray-900 even:bg-gray-50 even:dark:bg-gray-800 border-b dark:border-gray-700 border-gray-200"
|
|
/>
|
|
),
|
|
th: ({ node, ...props }) => (
|
|
<th
|
|
{...props}
|
|
scope="col"
|
|
className="px-6 py-3 font-medium text-gray-900 whitespace-nowrap dark:text-white"
|
|
/>
|
|
),
|
|
td: ({ node, ...props }) => <td {...props} className="px-6 py-4" />,
|
|
};
|
|
|
|
return (
|
|
<ReactMarkdown
|
|
children={rawContent}
|
|
components={components}
|
|
rehypePlugins={rehypePlugins}
|
|
remarkPlugins={[remarkGfm]}
|
|
/>
|
|
);
|
|
}
|