web_scraper

Running

App Files Files Community

Marcepelaez commited on 26 days ago

Commit

6ef7dfb

•

1 Parent(s): 0f3572b

app

Browse files

Files changed (1) hide show

app.py +29 -4

app.py CHANGED Viewed

@@ -23,8 +23,28 @@ def apply_theme(theme):
         .stMarkdown {
             color: black;
         }
         </style>
         """, unsafe_allow_html=True)
     else:
         st.markdown("""
         <style>
@@ -41,14 +61,16 @@ def apply_theme(theme):
         }
         </style>
         """, unsafe_allow_html=True)
-def scrape_web_content(url, max_images):
     """
     Scrape the web content while preserving its original formatting
     Args:
         url (str): URL of the webpage
         max_images (int): Maximum number of images to download
     Returns:
         dict: Extracted content with text, HTML, and images
@@ -98,8 +120,11 @@ def scrape_web_content(url, max_images):
         for tag in soup(["script", "style", "meta", "link", "noscript"]):
             tag.decompose()
-        # Convert remaining soup to HTML string
-        formatted_html = str(soup)
         # Extract plain text for preview
         plain_text = soup.get_text(separator='\n', strip=True)
@@ -142,7 +167,7 @@ def main():
     if st.button("Scrape Content"):
         if url_input:
             # Scrape the content
-            scraped_content = scrape_web_content(url_input, max_images)
             if scraped_content:
                 st.success("Content successfully scraped!")

         .stMarkdown {
             color: black;
         }
+        /* Light theme for HTML content */
+        .light-theme {
+            background-color: white !important;
+            color: black !important;
+        }
+        .light-theme a {
+            color: #0066cc !important;
+        }
+        .light-theme h1, .light-theme h2, .light-theme h3,
+        .light-theme h4, .light-theme h5, .light-theme h6 {
+            color: #333 !important;
+        }
         </style>
         """, unsafe_allow_html=True)
+        return """
+        <div style="background-color: white; color: black; padding: 20px;">
+        <style>
+            body { background-color: white !important; color: black !important; }
+            a { color: #0066cc; }
+            h1, h2, h3, h4, h5, h6 { color: #333; }
+        </style>
+        """
     else:
         st.markdown("""
         <style>
         }
         </style>
         """, unsafe_allow_html=True)
+        return ""
+def scrape_web_content(url, max_images, theme):
     """
     Scrape the web content while preserving its original formatting
     Args:
         url (str): URL of the webpage
         max_images (int): Maximum number of images to download
+        theme (str): Selected theme (Claro/Oscuro)
     Returns:
         dict: Extracted content with text, HTML, and images
         for tag in soup(["script", "style", "meta", "link", "noscript"]):
             tag.decompose()
+        # Apply light theme styling if selected
+        theme_prefix = apply_theme(theme) if theme == "Claro" else ""
+        # Convert remaining soup to HTML string with theme prefix
+        formatted_html = theme_prefix + str(soup)
         # Extract plain text for preview
         plain_text = soup.get_text(separator='\n', strip=True)
     if st.button("Scrape Content"):
         if url_input:
             # Scrape the content
+            scraped_content = scrape_web_content(url_input, max_images, theme)
             if scraped_content:
                 st.success("Content successfully scraped!")