File size: 1,073 Bytes
c6fd5b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import gradio as gr

from ..docs import document_manager
from ..tabs.workflow import log
from ..utils import html_to_markdown, markdown_to_text


async def fetch_document(source: str) -> tuple[str, gr.Dataset]:
    log.info("Fetching document %s", source)
    doc = await document_manager.get_doc(source)
    if doc:
        overline = doc.get("overline")
        title = doc.get("title")
        underline = doc.get("underline")
        authors = doc.get("authors")
        content = doc["content"]
        pieces = []
        if overline:
            pieces.append(f"### {overline}")
        if title:
            pieces.append(f"# {title}")
        if underline:
            pieces.append(f"### {underline}")
        if authors:
            pieces.append(f"#### By {','.join(authors)}")
        content = html_to_markdown(content)
        pieces.append(content)
        content = "\n\n".join(pieces)
        text = markdown_to_text(content)
        log.info("Successfully fetched document %s: %s chars", source, len(text))
        return content, text
    return "", ""