Spaces:
Runtime error
Runtime error
Synced repo using 'sync_with_huggingface' Github Action
Browse files- pages/1_URLs.py +10 -3
pages/1_URLs.py
CHANGED
@@ -30,8 +30,10 @@ def check_sitemap(url):
|
|
30 |
# Parse the content as XML
|
31 |
response = requests.get(url)
|
32 |
xml_content = etree.fromstring(response.content)
|
|
|
|
|
33 |
# Check for sitemap-specific elements
|
34 |
-
if
|
35 |
return True
|
36 |
except Exception as e:
|
37 |
st.error("Invalid sitemap!!")
|
@@ -40,9 +42,14 @@ def check_sitemap(url):
|
|
40 |
try:
|
41 |
response = requests.get(url)
|
42 |
# Perform additional checks specific to the website's structure or naming conventions
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
44 |
except Exception as e:
|
45 |
-
|
46 |
pass
|
47 |
|
48 |
return False
|
|
|
30 |
# Parse the content as XML
|
31 |
response = requests.get(url)
|
32 |
xml_content = etree.fromstring(response.content)
|
33 |
+
soup = BeautifulSoup(response.text, 'xml')
|
34 |
+
|
35 |
# Check for sitemap-specific elements
|
36 |
+
if soup.find_all('sitemap') or soup.find_all('urlset') or soup.find_all('sitemapindex'):
|
37 |
return True
|
38 |
except Exception as e:
|
39 |
st.error("Invalid sitemap!!")
|
|
|
42 |
try:
|
43 |
response = requests.get(url)
|
44 |
# Perform additional checks specific to the website's structure or naming conventions
|
45 |
+
xml_content = etree.fromstring(response.content)
|
46 |
+
soup = BeautifulSoup(response.text, 'xml')
|
47 |
+
|
48 |
+
# Check for sitemap-specific elements
|
49 |
+
if soup.find_all('sitemap') or soup.find_all('urlset') or soup.find_all('sitemapindex'):
|
50 |
+
return True
|
51 |
except Exception as e:
|
52 |
+
st.error("Invalid sitemap!!")
|
53 |
pass
|
54 |
|
55 |
return False
|