import gradio as gr import requests from bs4 import BeautifulSoup from markdownify import MarkdownConverter def md(soup, **options): return MarkdownConverter(**options).convert_soup(soup) def main_fn(url: str, check: list[int]): response = requests.get(url) soup = BeautifulSoup(response.text) for tag in ["script", "style"]: target = soup.find_all(tag) for t in target: t.clear body = soup.find("body") main = soup.find("main") if main: return md(main, strip=check) return md(body) demo = gr.Interface( main_fn, title="URL to Markdown", description="""
Gets HTML given by URL and converts it to Markdown.Does not support dynamically generated HTML such as React.
URLで与えたHTMLを取得してMarkdownに変換します。Reactなどの動的に生成されるHTMLには対応していません
New Version is here.""", inputs=[ gr.Text(label="URL", placeholder="https://*****"), gr.CheckboxGroup( label="Ignore tags(無視するタグ)", choices=["a", "img", "noscript"], value=["a", "img"], ), ], outputs=[gr.TextArea(label="Markdown", show_copy_button=True)], allow_flagging="never", ) demo.launch()