Update app.py
Browse files
app.py
CHANGED
@@ -44,7 +44,15 @@ def article_text_extractor(url: str):
|
|
44 |
title_text = soup.find_all(["h1"])
|
45 |
para_text = soup.find_all(["p"])
|
46 |
article_text = [result.text for result in para_text]
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
article = " ".join(article_text)
|
49 |
article = article.replace(".", ".<eos>")
|
50 |
article = article.replace("!", "!<eos>")
|
|
|
44 |
title_text = soup.find_all(["h1"])
|
45 |
para_text = soup.find_all(["p"])
|
46 |
article_text = [result.text for result in para_text]
|
47 |
+
|
48 |
+
try:
|
49 |
+
|
50 |
+
article_header = [result.text for result in title_text][0]
|
51 |
+
|
52 |
+
except:
|
53 |
+
|
54 |
+
article_header = ''
|
55 |
+
|
56 |
article = " ".join(article_text)
|
57 |
article = article.replace(".", ".<eos>")
|
58 |
article = article.replace("!", "!<eos>")
|