import gradio as gr from ..docs import document_manager from ..tabs.workflow import log from ..utils import html_to_markdown, markdown_to_text async def fetch_document(source: str) -> tuple[str, gr.Dataset]: log.info("Fetching document %s", source) doc = await document_manager.get_doc(source) if doc: overline = doc.get("overline") title = doc.get("title") underline = doc.get("underline") authors = doc.get("authors") content = doc["content"] pieces = [] if overline: pieces.append(f"### {overline}") if title: pieces.append(f"# {title}") if underline: pieces.append(f"### {underline}") if authors: pieces.append(f"#### By {','.join(authors)}") content = html_to_markdown(content) pieces.append(content) content = "\n\n".join(pieces) text = markdown_to_text(content) log.info("Successfully fetched document %s: %s chars", source, len(text)) return content, text return "", ""