refactor: update imports and improve TTS functionality with pause/resume support
This commit is contained in:
parent
2e2a0f28b4
commit
7f9bdee7f4
@ -115,7 +115,7 @@ export const synthesizeTTSAction = async (data: {
|
||||
},
|
||||
previous_text: data.previous_text,
|
||||
next_text: data.next_text,
|
||||
model_id: "eleven_multilingual_v2",
|
||||
model_id: "eleven_flash_v2_5", // use eleven_multilingual_v2 if this doesnt sound good
|
||||
}
|
||||
);
|
||||
const chunks: Buffer[] = [];
|
||||
|
@ -15,7 +15,7 @@ const client = new Mistral({ apiKey });
|
||||
const PROCESSING_PROMPT = `
|
||||
You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
|
||||
|
||||
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.**
|
||||
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** If there is a title to the document, it should be the first heading.
|
||||
Any images should be included.
|
||||
Do not return the Markdown as a code block, only as a raw string, without any new lines.
|
||||
|
||||
@ -49,18 +49,36 @@ Return the final result as a text object with the following structure (without c
|
||||
Do not return the text object as a code block, only as a raw string.
|
||||
`;
|
||||
|
||||
async function getCitations(citationsStr: string) {
|
||||
function getCitations(citationsStr: string) {
|
||||
try {
|
||||
const citations = JSON.parse(citationsStr).citations || {};
|
||||
console.log("Parsing citations string:", citationsStr);
|
||||
const citationsData = JSON.parse(citationsStr);
|
||||
|
||||
return
|
||||
console.log("Sanitizing citations...");
|
||||
const sanitizedCitations = citationsData.citations.map((citation: any) => {
|
||||
const sanitizedText = citation.text.replace(
|
||||
/(https?:\/\/[^\s]+)/g,
|
||||
(url: string) => encodeURI(url)
|
||||
);
|
||||
return {
|
||||
...citation,
|
||||
text: sanitizedText,
|
||||
};
|
||||
});
|
||||
|
||||
console.log("Sanitized citations:", sanitizedCitations);
|
||||
return sanitizedCitations;
|
||||
} catch (err) {
|
||||
console.error("Error parsing or sanitizing citations:", err);
|
||||
return [];
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
export async function POST(req: NextRequest) {
|
||||
console.log("Received POST request");
|
||||
|
||||
if (req.method === "OPTIONS") {
|
||||
console.log("Handling OPTIONS request");
|
||||
return new NextResponse(null, {
|
||||
headers: {
|
||||
...corsHeaders,
|
||||
@ -69,27 +87,16 @@ export async function POST(req: NextRequest) {
|
||||
});
|
||||
}
|
||||
|
||||
const formData = await req.formData();
|
||||
const accessToken = formData.get("access_token") as string;
|
||||
const refreshToken = formData.get("refresh_token") as string;
|
||||
|
||||
if (!formData.has("file") || !accessToken || !refreshToken) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error: "Missing required fields: file, access_token, or refresh_token",
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
const supabase = await createClient();
|
||||
|
||||
const file = formData.get("file") as File;
|
||||
const fileName = file.name;
|
||||
const uuid = crypto.randomUUID();
|
||||
|
||||
try {
|
||||
// Authenticate the user
|
||||
console.log("Parsing form data...");
|
||||
const formData = await req.formData();
|
||||
const accessToken = formData.get("access_token") as string;
|
||||
const refreshToken = formData.get("refresh_token") as string;
|
||||
|
||||
console.log("Creating Supabase client...");
|
||||
const supabase = await createClient();
|
||||
|
||||
console.log("Authenticating user...");
|
||||
const {
|
||||
data: { user },
|
||||
error: sessionError,
|
||||
@ -99,54 +106,152 @@ export async function POST(req: NextRequest) {
|
||||
});
|
||||
|
||||
if (sessionError) {
|
||||
console.error("Failed to set session:", sessionError.message);
|
||||
throw new Error("Failed to set session: " + sessionError.message);
|
||||
}
|
||||
|
||||
if (!user) {
|
||||
console.error("User not authenticated");
|
||||
throw new Error("User not authenticated");
|
||||
}
|
||||
|
||||
// Upload the file to Supabase storage
|
||||
const { data: storageData, error: storageError } = await supabase.storage
|
||||
.from("documents")
|
||||
.upload(`${user.id}/${uuid}.pdf`, file);
|
||||
var reprocessing = false;
|
||||
var uuid = crypto.randomUUID();
|
||||
|
||||
if (storageError) {
|
||||
throw new Error("Failed to upload file: " + storageError.message);
|
||||
if (formData.has("id")) {
|
||||
console.log("Reprocessing document...");
|
||||
reprocessing = true;
|
||||
console.log("File ID found in form data.");
|
||||
|
||||
const docId = formData.get("id");
|
||||
console.log("Document ID:", docId, formData);
|
||||
const { data: documentData, error: documentError } = await supabase
|
||||
.from("documents")
|
||||
.select("*")
|
||||
.eq("id", docId!.toString())
|
||||
.single();
|
||||
|
||||
if (documentError) {
|
||||
console.error("Error fetching document record:", documentError);
|
||||
|
||||
throw new Error("Document record fetch failed");
|
||||
}
|
||||
|
||||
if (documentData) {
|
||||
await supabase
|
||||
.from("documents")
|
||||
.update({
|
||||
is_processing: true,
|
||||
})
|
||||
.eq("id", documentData.id);
|
||||
uuid = documentData.id;
|
||||
} else {
|
||||
console.error("Document record not found.");
|
||||
|
||||
throw new Error("Document record not found");
|
||||
}
|
||||
|
||||
const { data: fileData, error: fileError } = await supabase.storage
|
||||
.from("documents")
|
||||
.download(`${user.id}/${uuid}.pdf`);
|
||||
|
||||
if (fileError) {
|
||||
console.error("Error downloading file from storage:", fileError);
|
||||
|
||||
throw new Error("File download failed");
|
||||
}
|
||||
|
||||
console.log("File downloaded from storage:", fileData);
|
||||
|
||||
formData.set("file", fileData);
|
||||
}
|
||||
|
||||
// Insert document record
|
||||
const { error: docError } = await supabase.from("documents").insert({
|
||||
id: uuid,
|
||||
file_name: file.name,
|
||||
owner: user.id,
|
||||
raw_file: storageData.id,
|
||||
is_processing: true,
|
||||
});
|
||||
|
||||
if (docError) {
|
||||
throw new Error("Failed to insert document record: " + docError.message);
|
||||
if (
|
||||
!reprocessing &&
|
||||
(!formData.has("file") || !accessToken || !refreshToken)
|
||||
) {
|
||||
console.error(
|
||||
"Missing required fields: file, access_token, or refresh_token"
|
||||
);
|
||||
return NextResponse.json(
|
||||
{
|
||||
error:
|
||||
"Missing required fields: file, access_token, or refresh_token",
|
||||
},
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
// Upload file to Mistral
|
||||
let file = formData.get("file") as File;
|
||||
const fileName = file.name;
|
||||
|
||||
if (!reprocessing) {
|
||||
console.log("Generated UUID for file:", uuid);
|
||||
|
||||
console.log("Uploading file to Supabase storage...");
|
||||
const { data: storageData, error: storageError } = await supabase.storage
|
||||
.from("documents")
|
||||
.upload(`${user.id}/${uuid}.pdf`, file);
|
||||
|
||||
if (storageError) {
|
||||
console.error("Failed to upload file:", storageError.message);
|
||||
throw new Error("Failed to upload file: " + storageError.message);
|
||||
}
|
||||
|
||||
console.log("Inserting document record...");
|
||||
const { error: docError } = await supabase.from("documents").insert({
|
||||
id: uuid,
|
||||
file_name: file.name,
|
||||
owner: user.id,
|
||||
raw_file: storageData.id,
|
||||
is_processing: true,
|
||||
});
|
||||
|
||||
if (docError) {
|
||||
console.error("Failed to insert document record:", docError.message);
|
||||
throw new Error(
|
||||
"Failed to insert document record: " + docError.message
|
||||
);
|
||||
}
|
||||
} else {
|
||||
console.log("Reprocessing document...");
|
||||
|
||||
const { error: docError } = await supabase
|
||||
.from("documents")
|
||||
.update({
|
||||
is_processing: true,
|
||||
})
|
||||
.eq("id", uuid);
|
||||
if (docError) {
|
||||
console.error("Error updating document record:", docError);
|
||||
throw new Error("Document record update failed");
|
||||
}
|
||||
console.log("Document record updated successfully.");
|
||||
}
|
||||
|
||||
console.log("Uploading file to Mistral...");
|
||||
const uploadedPdf = await client.files.upload({
|
||||
file: { fileName, content: file },
|
||||
file: { fileName: `${uuid}.pdf`, content: file },
|
||||
purpose: "ocr",
|
||||
});
|
||||
|
||||
console.log("Getting signed URL from Mistral...");
|
||||
const signedUrl = await client.files.getSignedUrl({
|
||||
fileId: uploadedPdf.id,
|
||||
});
|
||||
|
||||
// Process OCR
|
||||
console.log("Processing OCR...");
|
||||
const ocrResponse = await client.ocr.process({
|
||||
model: "mistral-ocr-latest",
|
||||
document: { type: "document_url", documentUrl: signedUrl.url },
|
||||
includeImageBase64: true,
|
||||
});
|
||||
|
||||
console.log("Processing OCR pages...");
|
||||
const limit = pLimit(2);
|
||||
const promises = ocrResponse.pages.map((page) =>
|
||||
limit(async () => {
|
||||
console.log("Processing page:", page);
|
||||
const response = await client.chat.complete({
|
||||
model: "mistral-small-latest",
|
||||
messages: [
|
||||
@ -161,32 +266,34 @@ export async function POST(req: NextRequest) {
|
||||
const split = response.choices[0].message.content.split("---------");
|
||||
const content = split[0].trim();
|
||||
const citationsStr = split[1]?.trim() || "{}";
|
||||
console.log(citationsStr);
|
||||
console.log("Citations string:", citationsStr);
|
||||
|
||||
const citations = await getCitations(citationsStr);
|
||||
const citations = getCitations(citationsStr);
|
||||
|
||||
return {
|
||||
...page,
|
||||
markdown: content,
|
||||
citations,
|
||||
...page,
|
||||
markdown: content,
|
||||
citations,
|
||||
};
|
||||
})
|
||||
);
|
||||
|
||||
const results = await Promise.all(promises);
|
||||
|
||||
// Update document record with OCR data
|
||||
console.log("Updating document record with OCR data...");
|
||||
const { error: updateError } = await supabase
|
||||
.from("documents")
|
||||
.update({ ocr_data: results, is_processing: false })
|
||||
.eq("id", uuid);
|
||||
|
||||
if (updateError) {
|
||||
console.error("Failed to update document record:", updateError.message);
|
||||
throw new Error(
|
||||
"Failed to update document record: " + updateError.message
|
||||
);
|
||||
}
|
||||
|
||||
console.log("Document processed successfully");
|
||||
return NextResponse.json({
|
||||
message: "Document processed successfully",
|
||||
results,
|
||||
|
@ -23,6 +23,7 @@ export default function KokoroReader({ pages }: { pages: any[] }) {
|
||||
playInOrder,
|
||||
status,
|
||||
pause,
|
||||
resume,
|
||||
} = useTTS();
|
||||
|
||||
const [playing, setPlaying] = useState(false);
|
||||
@ -32,18 +33,17 @@ export default function KokoroReader({ pages }: { pages: any[] }) {
|
||||
}, [status === "ready"]);
|
||||
|
||||
const play = () => {
|
||||
if (playing) {
|
||||
setPlaying(false);
|
||||
return;
|
||||
if (!playing && status === "paused") {
|
||||
resume();
|
||||
} else {
|
||||
playInOrder(currentSentence || 0);
|
||||
}
|
||||
|
||||
setPlaying(true);
|
||||
playInOrder(currentSentence || 0);
|
||||
};
|
||||
|
||||
const paused = () => {
|
||||
setPlaying(false);
|
||||
pause();
|
||||
pause(); // Call the pause function from TTSProvider
|
||||
};
|
||||
|
||||
return (
|
||||
|
@ -90,7 +90,7 @@ export default function MarkdownRenderer({
|
||||
<h3 className="text-lg font-medium mb-2 text-gray-300" {...props} />
|
||||
),
|
||||
h4: ({ node, ...props }) => (
|
||||
<h4 className="text-lg font-medium mb-2 text-gray-300" {...props} />
|
||||
<h4 className="text-lg font-bold mb-2 text-gray-300" {...props} />
|
||||
),
|
||||
p: ({ node, ...props }) => (
|
||||
<p className="leading-7 text-gray-200" {...props} />
|
||||
@ -147,7 +147,7 @@ export default function MarkdownRenderer({
|
||||
{...props}
|
||||
/>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent className="w-56 overflow-hidden rounded-lg p-0">
|
||||
<PopoverContent className="w-auto max-w-3xl bg-gray-900 overflow-hidden rounded-lg p-0">
|
||||
<div className="p-4">
|
||||
<p>{citation.text}</p>
|
||||
</div>
|
||||
|
@ -29,7 +29,7 @@ interface TTSContextType {
|
||||
currentSentence: number;
|
||||
voices: any[];
|
||||
selectedSpeaker: string;
|
||||
status: "ready" | "running" | null;
|
||||
status: "ready" | "running" | "paused" | null;
|
||||
setSelectedSpeaker: (speaker: string) => void;
|
||||
setCurrentSentence: (index: number) => void;
|
||||
playSentence: (index: number) => void;
|
||||
@ -60,11 +60,16 @@ export const TTSProvider = ({
|
||||
|
||||
const [selectedSpeaker, setSelectedSpeaker] = useState("af_heart");
|
||||
const [voices, setVoices] = useState<any[]>([]);
|
||||
const [status, setStatus] = useState<"ready" | "running" | null>("ready");
|
||||
const [status, setStatus] = useState<"ready" | "running" | "paused" | null>(
|
||||
"ready"
|
||||
);
|
||||
|
||||
// Cache for preloaded audio
|
||||
const audioCache = useRef<Map<number, string>>(new Map());
|
||||
|
||||
// Currently processing TTS
|
||||
const [processing, setProcessing] = useState<number[]>([]);
|
||||
|
||||
// Preload audio for a range of sentences
|
||||
const preloadAudio = async (startIndex: number, range: number = 3) => {
|
||||
for (
|
||||
@ -72,9 +77,10 @@ export const TTSProvider = ({
|
||||
i < Math.min(sentences.length, startIndex + range);
|
||||
i++
|
||||
) {
|
||||
if (!audioCache.current.has(i)) {
|
||||
if (!audioCache.current.has(i) && !processing.includes(i)) {
|
||||
console.log(`Preloading audio for sentence ${i}: ${sentences[i]}`);
|
||||
try {
|
||||
setProcessing((prev) => [...prev, i]); // Add to processing
|
||||
const audioUrl = await generateTTS(sentences[i], i);
|
||||
audioCache.current.set(i, audioUrl); // Cache the audio URL
|
||||
} catch (error) {
|
||||
@ -140,7 +146,15 @@ export const TTSProvider = ({
|
||||
if (index < 0 || index >= sentences.length) return;
|
||||
setCurrentSentence(index);
|
||||
|
||||
// Introduce a flag to track whether playback should continue
|
||||
let shouldContinue = true;
|
||||
|
||||
for (let i = index; i < sentences.length; i++) {
|
||||
if (!shouldContinue) {
|
||||
console.log("Playback stopped or paused.");
|
||||
break;
|
||||
}
|
||||
|
||||
console.log("Playing sentence:", i, sentences[i]);
|
||||
try {
|
||||
await playSentence(i);
|
||||
@ -150,17 +164,22 @@ export const TTSProvider = ({
|
||||
break; // Stop playback on error
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the playback state when done
|
||||
setStatus("ready");
|
||||
};
|
||||
|
||||
const pause = () => {
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
setStatus("paused"); // Update the status to paused
|
||||
}
|
||||
};
|
||||
|
||||
const resume = () => {
|
||||
if (audioRef.current) {
|
||||
if (audioRef.current && status === "paused") {
|
||||
audioRef.current.play();
|
||||
setStatus("running"); // Update the status to running
|
||||
}
|
||||
};
|
||||
|
||||
@ -168,7 +187,10 @@ export const TTSProvider = ({
|
||||
if (audioRef.current) {
|
||||
audioRef.current.pause();
|
||||
audioRef.current.currentTime = 0;
|
||||
setStatus("ready"); // Update the status to ready
|
||||
}
|
||||
// Reset the playback flag
|
||||
shouldContinue = false;
|
||||
};
|
||||
|
||||
// Preload sentences when the current sentence changes
|
||||
|
@ -17,7 +17,7 @@ import {
|
||||
Upload,
|
||||
} from "lucide-react";
|
||||
|
||||
import { NavDocuments } from "@/components/nav-favorites";
|
||||
import { NavDocuments } from "@/components/nav-documents";
|
||||
import { NavMain } from "@/components/nav-main";
|
||||
import { NavSecondary } from "@/components/nav-secondary";
|
||||
import {
|
||||
|
@ -29,9 +29,11 @@ import {
|
||||
} from "@/components/ui/sidebar";
|
||||
import { createClient } from "@/utils/supabase/client";
|
||||
import { toast } from "sonner";
|
||||
import { SSE } from "sse.js";
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
export function NavDocuments({
|
||||
documents,
|
||||
documents: ogDocuments,
|
||||
}: {
|
||||
documents: {
|
||||
id: string;
|
||||
@ -43,6 +45,53 @@ export function NavDocuments({
|
||||
}) {
|
||||
const { isMobile } = useSidebar();
|
||||
const supabase = createClient();
|
||||
const [documents, setDocuments] = useState(ogDocuments);
|
||||
|
||||
useEffect(() => {
|
||||
// watch for changes in the documents table, update the state when it changes
|
||||
|
||||
const handleRecordInserted = (payload: any) => {
|
||||
const newDocument = payload.new;
|
||||
setDocuments((prev) => [...prev, newDocument]);
|
||||
};
|
||||
const handleRecordUpdated = (payload: any) => {
|
||||
const updatedDocument = payload.new;
|
||||
setDocuments((prev) =>
|
||||
prev.map((doc) =>
|
||||
doc.id === updatedDocument.id ? updatedDocument : doc
|
||||
)
|
||||
);
|
||||
};
|
||||
const handleRecordDeleted = (payload: any) => {
|
||||
const deletedDocument = payload.old;
|
||||
setDocuments((prev) =>
|
||||
prev.filter((doc) => doc.id !== deletedDocument.id)
|
||||
);
|
||||
};
|
||||
|
||||
const subscription = supabase
|
||||
.channel("documents")
|
||||
.on(
|
||||
"postgres_changes",
|
||||
{ event: "INSERT", schema: "public", table: "documents" },
|
||||
handleRecordInserted
|
||||
)
|
||||
.on(
|
||||
"postgres_changes",
|
||||
{ event: "UPDATE", schema: "public", table: "documents" },
|
||||
handleRecordUpdated
|
||||
)
|
||||
.on(
|
||||
"postgres_changes",
|
||||
{ event: "DELETE", schema: "public", table: "documents" },
|
||||
handleRecordDeleted
|
||||
)
|
||||
.subscribe();
|
||||
|
||||
return () => {
|
||||
subscription.unsubscribe();
|
||||
};
|
||||
}, [ogDocuments, supabase]);
|
||||
|
||||
return (
|
||||
<SidebarGroup className="group-data-[collapsible=icon]:hidden">
|
||||
@ -92,19 +141,33 @@ export function NavDocuments({
|
||||
session.data.session.refresh_token
|
||||
);
|
||||
|
||||
toast.promise(
|
||||
supabase.functions.invoke("process-document", {
|
||||
body: data,
|
||||
}),
|
||||
{
|
||||
loading: "Reprocessing document...",
|
||||
success: "Document reprocessed successfully",
|
||||
error: (err) => {
|
||||
console.error("Error reprocessing document:", err);
|
||||
return "Failed to reprocess document";
|
||||
},
|
||||
const eventSource = new SSE(`/api/process-document`, {
|
||||
payload: data,
|
||||
headers: {
|
||||
apikey: process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY!,
|
||||
Authorization: `Bearer ${process.env.NEXT_PUBLIC_SUPABASE_ANON_KEY}`,
|
||||
},
|
||||
method: "POST",
|
||||
});
|
||||
|
||||
toast.loading("Reprocessing document...");
|
||||
|
||||
eventSource.onmessage = (event) => {
|
||||
const message = JSON.parse(event.data);
|
||||
if (message.status === "success") {
|
||||
toast.success("Document reprocessed successfully");
|
||||
eventSource.close();
|
||||
} else if (message.status === "error") {
|
||||
toast.error("Failed to reprocess document");
|
||||
eventSource.close();
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
eventSource.onerror = (err) => {
|
||||
console.error("SSE error:", err);
|
||||
toast.error("An error occurred while reprocessing");
|
||||
eventSource.close();
|
||||
};
|
||||
}}
|
||||
>
|
||||
<RefreshCw className="text-muted-foreground" />
|
||||
@ -119,12 +182,6 @@ export function NavDocuments({
|
||||
</DropdownMenu>
|
||||
</SidebarMenuItem>
|
||||
))}
|
||||
<SidebarMenuItem>
|
||||
<SidebarMenuButton className="text-sidebar-foreground/70">
|
||||
<MoreHorizontal />
|
||||
<span>More</span>
|
||||
</SidebarMenuButton>
|
||||
</SidebarMenuItem>
|
||||
</SidebarMenu>
|
||||
</SidebarGroup>
|
||||
);
|
@ -14,7 +14,7 @@ const client = new Mistral({
|
||||
const PROCESSING_PROMPT = `
|
||||
You are a document processing AI. Your task is to process the Markdown text scanned from a document page and return it in a clean and structured format.
|
||||
|
||||
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.**
|
||||
The textual page data should only be returned in valid Markdown format. Use proper headings and subheadings to structure the content. **Do not add headings if they do not exist in the original text.** If there is a title to the document, it should be the first heading.
|
||||
Any images should be included.
|
||||
Do not return the Markdown as a code block, only as a raw string, without any new lines.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user