Marcepelaez commited on
Commit
6ef7dfb
1 Parent(s): 0f3572b
Files changed (1) hide show
  1. app.py +29 -4
app.py CHANGED
@@ -23,8 +23,28 @@ def apply_theme(theme):
23
  .stMarkdown {
24
  color: black;
25
  }
 
 
 
 
 
 
 
 
 
 
 
 
26
  </style>
27
  """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
28
  else:
29
  st.markdown("""
30
  <style>
@@ -41,14 +61,16 @@ def apply_theme(theme):
41
  }
42
  </style>
43
  """, unsafe_allow_html=True)
 
44
 
45
- def scrape_web_content(url, max_images):
46
  """
47
  Scrape the web content while preserving its original formatting
48
 
49
  Args:
50
  url (str): URL of the webpage
51
  max_images (int): Maximum number of images to download
 
52
 
53
  Returns:
54
  dict: Extracted content with text, HTML, and images
@@ -98,8 +120,11 @@ def scrape_web_content(url, max_images):
98
  for tag in soup(["script", "style", "meta", "link", "noscript"]):
99
  tag.decompose()
100
 
101
- # Convert remaining soup to HTML string
102
- formatted_html = str(soup)
 
 
 
103
 
104
  # Extract plain text for preview
105
  plain_text = soup.get_text(separator='\n', strip=True)
@@ -142,7 +167,7 @@ def main():
142
  if st.button("Scrape Content"):
143
  if url_input:
144
  # Scrape the content
145
- scraped_content = scrape_web_content(url_input, max_images)
146
 
147
  if scraped_content:
148
  st.success("Content successfully scraped!")
 
23
  .stMarkdown {
24
  color: black;
25
  }
26
+ /* Light theme for HTML content */
27
+ .light-theme {
28
+ background-color: white !important;
29
+ color: black !important;
30
+ }
31
+ .light-theme a {
32
+ color: #0066cc !important;
33
+ }
34
+ .light-theme h1, .light-theme h2, .light-theme h3,
35
+ .light-theme h4, .light-theme h5, .light-theme h6 {
36
+ color: #333 !important;
37
+ }
38
  </style>
39
  """, unsafe_allow_html=True)
40
+ return """
41
+ <div style="background-color: white; color: black; padding: 20px;">
42
+ <style>
43
+ body { background-color: white !important; color: black !important; }
44
+ a { color: #0066cc; }
45
+ h1, h2, h3, h4, h5, h6 { color: #333; }
46
+ </style>
47
+ """
48
  else:
49
  st.markdown("""
50
  <style>
 
61
  }
62
  </style>
63
  """, unsafe_allow_html=True)
64
+ return ""
65
 
66
+ def scrape_web_content(url, max_images, theme):
67
  """
68
  Scrape the web content while preserving its original formatting
69
 
70
  Args:
71
  url (str): URL of the webpage
72
  max_images (int): Maximum number of images to download
73
+ theme (str): Selected theme (Claro/Oscuro)
74
 
75
  Returns:
76
  dict: Extracted content with text, HTML, and images
 
120
  for tag in soup(["script", "style", "meta", "link", "noscript"]):
121
  tag.decompose()
122
 
123
+ # Apply light theme styling if selected
124
+ theme_prefix = apply_theme(theme) if theme == "Claro" else ""
125
+
126
+ # Convert remaining soup to HTML string with theme prefix
127
+ formatted_html = theme_prefix + str(soup)
128
 
129
  # Extract plain text for preview
130
  plain_text = soup.get_text(separator='\n', strip=True)
 
167
  if st.button("Scrape Content"):
168
  if url_input:
169
  # Scrape the content
170
+ scraped_content = scrape_web_content(url_input, max_images, theme)
171
 
172
  if scraped_content:
173
  st.success("Content successfully scraped!")