"use client"; import { Popover, PopoverTrigger, PopoverContent, } from "@/components/ui/popover"; import { useMemo } from "react"; import ReactMarkdown, { Components } from "react-markdown"; import rehypeRaw from "rehype-raw"; import { useTTS } from "./TTSProvider"; import rehypeHighlight from "@/lib/utils"; import { Database } from "@/utils/supabase/types"; import remarkGfm from "remark-gfm"; // Utility to escape regex special characters: function escapeRegExp(text: string) { return text.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&"); } export type OCRData = { index: number; images: { id: string; topLeftX: number; topLeftY: number; bottomRightX: number; bottomRightY: number; imageBase64: string; }[]; markdown: string; citations: { text: string; number: string; }[]; dimensions: { dpi: number; width: number; height: number; }; }; export default function MarkdownRenderer({ document, }: { document: Database["public"]["Tables"]["documents"]["Row"]; }) { // Obtain TTS info from context. // TTSProvider is already wrapping this component higher in the tree. const { currentSentence, sentences } = useTTS(); // Determine the text to highlight. const textToHighlight = useMemo(() => { if (!sentences || sentences.length === 0) return ""; return sentences[currentSentence] || ""; }, [sentences, currentSentence]); // Setup rehype plugins including our highlight plugin. const rehypePlugins = useMemo( () => [rehypeRaw, [rehypeHighlight, { textToHighlight }] as any], [textToHighlight] ); const ocr = document?.ocr_data as OCRData[]; // Join all markdown content from the OCR data into a single string. Add page separators. const rawContent = ocr .map((page) => page.markdown) .map((page, index) => { const pageIndex = index + 1; // 1-based index for pages const pageSeparator = `\n\n###### Page ${pageIndex}\n\n`; return `${page}${pageSeparator}`; }) .join("\n\n"); const citations: { text: string; page: number; index: string; number: number; inTextNumber: string; }[] = []; const totalPages = ocr.length; const totalSentences = sentences.length; let totalCitations = 0; ocr.forEach((page) => { // each page has its own citations (1-N), so we need to map them correctly page.citations.forEach((citation, index) => { totalCitations += 1; citations.push({ text: citation.text, page: page.index, index: (totalCitations + index).toString(), // unique index across all pages number: totalCitations + index + 1, // 1-based numbering inTextNumber: citation.number, }); }); }); const components: Components = { h1: ({ node, ...props }) => (
), h2: ({ node, ...props }) => ( ), h3: ({ node, ...props }) => ( ), h4: ({ node, ...props }) => ( ), h6: ({ node, ...props }) => { const text = props.children!.toString().split(" ")[1]; const pageIndex = parseInt(text) - 1; // Convert to 0-based index const page = ocr[pageIndex]; return (
),
sup: ({ node, ...props }) => {
// Check if the text contains a reference number
if (!props.children) {
return ;
}
const text = props.children!.toString();
const referenceNumber = text;
if (!referenceNumber) {
return ;
}
const citation = citations.find(
(c) =>
c.index === referenceNumber || c.number.toString() === referenceNumber
);
if (!citation) {
return ;
}
return (
{citation.text}
Page {citation.page}, Reference {citation.inTextNumber}