sam2ai commited on
Commit
822d429
1 Parent(s): 659d528

Synced repo using 'sync_with_huggingface' Github Action

Browse files
Files changed (1) hide show
  1. pages/1_URLs.py +10 -3
pages/1_URLs.py CHANGED
@@ -30,8 +30,10 @@ def check_sitemap(url):
30
  # Parse the content as XML
31
  response = requests.get(url)
32
  xml_content = etree.fromstring(response.content)
 
 
33
  # Check for sitemap-specific elements
34
- if xml_content.tag == 'urlset' or xml_content.tag == 'sitemapindex':
35
  return True
36
  except Exception as e:
37
  st.error("Invalid sitemap!!")
@@ -40,9 +42,14 @@ def check_sitemap(url):
40
  try:
41
  response = requests.get(url)
42
  # Perform additional checks specific to the website's structure or naming conventions
43
- return True
 
 
 
 
 
44
  except Exception as e:
45
- # st.error("Invalid sitemap!!")
46
  pass
47
 
48
  return False
 
30
  # Parse the content as XML
31
  response = requests.get(url)
32
  xml_content = etree.fromstring(response.content)
33
+ soup = BeautifulSoup(response.text, 'xml')
34
+
35
  # Check for sitemap-specific elements
36
+ if soup.find_all('sitemap') or soup.find_all('urlset') or soup.find_all('sitemapindex'):
37
  return True
38
  except Exception as e:
39
  st.error("Invalid sitemap!!")
 
42
  try:
43
  response = requests.get(url)
44
  # Perform additional checks specific to the website's structure or naming conventions
45
+ xml_content = etree.fromstring(response.content)
46
+ soup = BeautifulSoup(response.text, 'xml')
47
+
48
+ # Check for sitemap-specific elements
49
+ if soup.find_all('sitemap') or soup.find_all('urlset') or soup.find_all('sitemapindex'):
50
+ return True
51
  except Exception as e:
52
+ st.error("Invalid sitemap!!")
53
  pass
54
 
55
  return False