import gradio as gr import requests from bs4 import BeautifulSoup from markdownify import MarkdownConverter def md(soup, **options): return MarkdownConverter(**options).convert_soup(soup) def main_fn(url: str, check: list[int]): response = requests.get(url) soup = BeautifulSoup(response.text) for tag in ["script", "style"]: target = soup.find_all(tag) for t in target: t.clear body = soup.find("body") main = soup.find("main") if main: return md(main, strip=check) return md(body) demo = gr.Interface( main_fn, title="URL to Markdown V2", description="""
It gets the HTML given by the URL and converts it to Markdown. It uses Playwright, so it also supports dynamically generated HTML such as React.
URLで与えたHTMLを取得してMarkdownに変換します。Playwright を使用しているのでReactなどの動的に生成されるHTMLにも対応しています
""", inputs=[ gr.Text(label="URL", placeholder="https://*****"), gr.CheckboxGroup( label="Ignore tags(無視するタグ)", choices=["a", "img", "noscript"], value=["a", "img"], ), ], outputs=[gr.TextArea(label="Markdown", show_copy_button=True)], allow_flagging="never", ) demo.launch()