Spaces:
Running
Running
File size: 1,531 Bytes
b3702ea a84424d 5e0955d b3702ea |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import gradio as gr
import requests
from bs4 import BeautifulSoup
from markdownify import MarkdownConverter
def md(soup, **options):
return MarkdownConverter(**options).convert_soup(soup)
def main_fn(url: str, check: list[int]):
response = requests.get(url)
soup = BeautifulSoup(response.text)
for tag in ["script", "style"]:
target = soup.find_all(tag)
for t in target:
t.clear
body = soup.find("body")
main = soup.find("main")
if main:
return md(main, strip=check)
return md(body)
demo = gr.Interface(
main_fn,
title="URL to Markdown",
description="""<div style="width: fit-content; margin: 0 auto;">Gets HTML given by URL and converts it to Markdown.Does not support dynamically generated HTML such as React.</div>
<div style="width: fit-content; margin: 0 auto;">URLで与えたHTMLを取得してMarkdownに変換します。Reactなどの動的に生成されるHTMLには対応していません</div>
<div style="width: fit-content; margin: 0 auto;"><a href="https://huggingface.co/spaces/moritalous/url-to-markdown-v2">New Version is here.</a></div>""",
inputs=[
gr.Text(label="URL", placeholder="https://*****"),
gr.CheckboxGroup(
label="Ignore tags(無視するタグ)",
choices=["a", "img", "noscript"],
value=["a", "img"],
),
],
outputs=[gr.TextArea(label="Markdown", show_copy_button=True)],
allow_flagging="never",
)
demo.launch()
|