neuroread/components/MarkdownRenderer.tsx

267 lines
7.9 KiB
TypeScript

"use client";
import {
Popover,
PopoverTrigger,
PopoverContent,
} from "@/components/ui/popover";
import { useMemo } from "react";
import ReactMarkdown, { Components } from "react-markdown";
import rehypeRaw from "rehype-raw";
import { useTTS } from "./TTSProvider";
import rehypeHighlight from "@/lib/utils";
import { Database } from "@/utils/supabase/types";
import remarkGfm from "remark-gfm";
// Utility to escape regex special characters:
function escapeRegExp(text: string) {
return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
}
export type OCRData = {
index: number;
images: {
id: string;
topLeftX: number;
topLeftY: number;
bottomRightX: number;
bottomRightY: number;
imageBase64: string;
}[];
markdown: string;
citations: {
text: string;
number: string;
}[];
dimensions: {
dpi: number;
width: number;
height: number;
};
};
export default function MarkdownRenderer({
document,
}: {
document: Database["public"]["Tables"]["documents"]["Row"];
}) {
// Obtain TTS info from context.
// TTSProvider is already wrapping this component higher in the tree.
const { currentSentence, sentences } = useTTS();
// Determine the text to highlight.
const textToHighlight = useMemo(() => {
if (!sentences || sentences.length === 0) return "";
return sentences[currentSentence] || "";
}, [sentences, currentSentence]);
// Setup rehype plugins including our highlight plugin.
const rehypePlugins = useMemo(
() => [rehypeRaw, [rehypeHighlight, { textToHighlight }] as any],
[textToHighlight]
);
const ocr = document?.ocr_data as OCRData[];
// Join all markdown content from the OCR data into a single string. Add page separators.
const rawContent = ocr
.map((page) => page.markdown)
.map((page, index) => {
const pageIndex = index + 1; // 1-based index for pages
const pageSeparator = `\n\n###### Page ${pageIndex}\n\n`;
return `${page}${pageSeparator}`;
})
.join("\n\n");
const citations: {
text: string;
page: number;
index: string;
number: number;
inTextNumber: string;
}[] = [];
const totalPages = ocr.length;
const totalSentences = sentences.length;
let totalCitations = 0;
ocr.forEach((page) => {
// each page has its own citations (1-N), so we need to map them correctly
page.citations.forEach((citation, index) => {
totalCitations += 1;
citations.push({
text: citation.text,
page: page.index,
index: (totalCitations + index).toString(), // unique index across all pages
number: totalCitations + index + 1, // 1-based numbering
inTextNumber: citation.number,
});
});
});
const components: Components = {
h1: ({ node, ...props }) => (
<h1 className="text-2xl font-semibold mb-4 text-white" {...props} />
),
h2: ({ node, ...props }) => (
<h2 className="text-xl font-medium mb-3 text-white" {...props} />
),
h3: ({ node, ...props }) => (
<h3 className="text-lg font-medium mb-2 text-gray-300" {...props} />
),
h4: ({ node, ...props }) => (
<h4 className="text-lg font-bold mb-2 text-gray-300" {...props} />
),
h6: ({ node, ...props }) => {
const text = props.children!.toString().split(" ")[1];
const pageIndex = parseInt(text) - 1; // Convert to 0-based index
const page = ocr[pageIndex];
return (
<div className="relative flex py-5 items-center">
<div className="flex-grow border-t border-slate-800"></div>
<span className="flex-shrink mx-4 text-gray-400">
Page {pageIndex + 1} of {totalPages}
</span>
<div className="flex-grow border-t border-slate-800"></div>
</div>
);
},
p: ({ node, ...props }) => (
<p className="leading-7 text-gray-200 mb-3" {...props} />
),
img: ({ node, ...props }) => {
const { src, alt } = props as any;
const pageIndex = ocr.findIndex((p) =>
p.images.find((image) => image.id === src)
);
if (pageIndex === -1) return null; // Handle the case where the page is not found
const page = ocr.find((p) => p.index === pageIndex);
if (!page) return null; // Handle the case where the page is not found
const img = page.images.find((image) => {
if (image.id === src) {
return true;
}
});
if (!img) return null; // Handle the case where the image is not found
// Calculate the width and height based on the image dimensions and DPI
const dpi = page.dimensions.dpi;
const width = ((img.bottomRightX - img.topLeftX) / dpi) * 96; // Convert to pixels
const height = ((img.bottomRightY - img.topLeftY) / dpi) * 96; // Convert to pixels
return (
<div className="w-full flex justify-center mb-4">
<img
className="rounded-lg shadow-sm"
data-image
src={img.imageBase64}
alt={alt}
width={width}
height={height}
/>
</div>
);
},
a: ({ node, ...props }) => (
<a className="text-blue-400 hover:underline" {...props} />
),
strong: ({ node, ...props }) => (
<strong className="text-gray-200 font-semibold" {...props} />
),
blockquote: ({ node, ...props }) => (
<blockquote
className="italic border-l-4 pl-4 border-gray-600 text-gray-300"
{...props}
/>
),
code: ({ node, ...props }) => (
<code
className="bg-gray-800 rounded px-1 py-0.5 text-gray-200"
{...props}
/>
),
sup: ({ node, ...props }) => {
// Check if the text contains a reference number
if (!props.children) {
return <sup {...props} />;
}
const text = props.children!.toString();
const referenceNumber = text;
if (!referenceNumber) {
return <sup {...props} data-error="no reference number" />;
}
const citation = citations.find(
(c) =>
c.index === referenceNumber || c.number.toString() === referenceNumber
);
if (!citation) {
return <sup {...props} data-error="no citation found" />;
}
return (
<Popover>
<PopoverTrigger asChild>
<sup
className="text-gray-200 cursor-pointer underline hover:cursor-pointer"
{...props}
/>
</PopoverTrigger>
<PopoverContent className="w-auto max-w-3xl bg-gray-900 overflow-hidden rounded-lg p-0">
<div className="p-4">
<p>{citation.text}</p>
<p className="text-sm text-gray-500">
Page {citation.page}, Reference {citation.inTextNumber}
</p>
</div>
</PopoverContent>
</Popover>
);
},
table: ({ node, ...props }) => (
<div className="relative overflow-x-auto shadow-md sm:rounded-lg">
<table
{...props}
className="w-full text-sm text-left rtl:text-right text-gray-500 dark:text-gray-400"
/>
</div>
),
thead: ({ node, ...props }) => (
<thead
{...props}
className="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-700 dark:text-gray-400"
/>
),
tbody: ({ node, ...props }) => <tbody {...props} />,
tr: ({ node, ...props }) => (
<tr
{...props}
className="odd:bg-white odd:dark:bg-gray-900 even:bg-gray-50 even:dark:bg-gray-800 border-b dark:border-gray-700 border-gray-200"
/>
),
th: ({ node, ...props }) => (
<th
{...props}
scope="col"
className="px-6 py-3 font-medium text-gray-900 whitespace-nowrap dark:text-white"
/>
),
td: ({ node, ...props }) => <td {...props} className="px-6 py-4" />,
};
return (
<ReactMarkdown
children={rawContent}
components={components}
rehypePlugins={rehypePlugins}
remarkPlugins={[remarkGfm]}
/>
);
}