Spaces:

acecalisto3
/

CEEMEESEEK

Runtime error

App Files Files Community

acecalisto3 commited on Oct 15, 2024

Commit

34bc10c

verified ·

1 Parent(s): 6bf8a84

Update app.py

Browse files

Files changed (1) hide show

app.py +269 -102

app.py CHANGED Viewed

@@ -4,99 +4,151 @@ import csv
 import time
 import hashlib
 import logging
 import gradio as gr
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.chrome.options import Options
 from webdriver_manager.chrome import ChromeDriverManager
 from huggingface_hub import InferenceClient
 # Configure logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Define constants
 PREFIX = "Task started at {date_time_str}. Purpose: {purpose}"
 TASK_PROMPT = "Current task: {task}. History:\n{history}"
-# Define current date/time
-date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 # Define purpose
 purpose = """
-You go to Culvers sites, you continuously seek changes on them since your last observation.
-Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data.
 """
-# Define history
 history = []
-# Define current task
 current_task = None
-# Default file path
-default_file_path = "user/app/scraped_data/culver/culvers_changes.csv"
 # Ensure the directory exists
-os.makedirs(os.path.dirname(default_file_path), exist_ok=True)
-# Function to monitor URLs for changes
-def monitor_urls(storage_location, url1, url2, scrape_interval, content_type):
     global history
-    urls = [url1, url2]
-    previous_hashes = ["", ""]
-    # Ensure the directory exists
-    os.makedirs(os.path.dirname(storage_location), exist_ok=True)
-    with open(storage_location, "w", newline='') as csvfile:
-        csv_toolkit = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
-        csv_toolkit.writeheader()
-        options = Options()
-        options.headless = True
-        options.add_argument("--disable-gpu")
-        options.add_argument("--no-sandbox")
-        options.add_argument("--disable-dev-shm-usage")
-        with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
-            try:
-                while True:
-                    for i, url in enumerate(urls):
-                        try:
-                            driver.get(url)
-                            time.sleep(2)  # Wait for the page to load
-                            if content_type == "text":
-                                current_content = driver.page_source
-                            elif content_type == "media":
-                                current_content = driver.find_elements_by_tag_name("img")
-                            else:
-                                current_content = driver.page_source
-                            current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
-                            if current_hash != previous_hashes[i]:
-                                previous_hashes[i] = current_hash
-                                date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-                                history.append(f"Change detected at {url} on {date_time_str}")
-                                csv_toolkit.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
-                                logging.info(f"Change detected at {url} on {date_time_str}")
-                        except Exception as e:
-                            logging.error(f"Error accessing {url}: {e}")
-                    time.sleep(scrape_interval * 60)  # Check every scrape_interval minutes
-            except KeyboardInterrupt:
-                logging.info("Monitoring stopped by user.")
-            finally:
-                driver.quit()
-# Define main function to handle user input
-def handle_input(storage_location, url1, url2, scrape_interval, content_type):
-    global current_task, history
     current_task = f"Monitoring URLs: {url1}, {url2}"
     history.append(f"Task started: {current_task}")
-    monitor_urls(storage_location, url1, url2, scrape_interval, content_type)
-    return TASK_PROMPT.format(task=current_task, history="\n".join(history))
 # Define the chat response function
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
@@ -110,52 +162,167 @@ def respond(
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 # Create Gradio interface
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
             minimum=0.1,
             maximum=1.0,
             value=0.95,
             step=0.05,
-            label="Top-p (nucleus sampling)",
         ),
-        gr.Textbox(value=default_file_path, label="Storage Location"),
-        gr.Textbox(value="https://www.culver.k12.in.us/", label="URL 1"),
-        gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2"),
-        gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)"),
-        gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type"),
-    ],
-    title="Culvers Site Monitor and Chatbot",
-    description="Monitor changes on Culvers' websites and log them into a CSV file. Also, chat with a friendly chatbot."
-)
 if __name__ == "__main__":
     demo.launch()

 import time
 import hashlib
 import logging
+import threading
+from pathlib import Path
 import gradio as gr
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
 from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
 from webdriver_manager.chrome import ChromeDriverManager
 from huggingface_hub import InferenceClient
 # Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler("monitoring.log"),
+        logging.StreamHandler()
+    ]
+)
 # Define constants
 PREFIX = "Task started at {date_time_str}. Purpose: {purpose}"
 TASK_PROMPT = "Current task: {task}. History:\n{history}"
 # Define purpose
 purpose = """
+You monitor Culvers sites continuously, seeking changes since your last observation.
+Any new changes are logged and dumped into a CSV, stored in your log folder at user/app/scraped_data.
 """
+# Initialize history and task variables
 history = []
 current_task = None
+monitoring_thread = None
+stop_event = threading.Event()
+# Default file path using pathlib for cross-platform compatibility
+default_file_path = Path("user/app/scraped_data/culver/culvers_changes.csv")
 # Ensure the directory exists
+default_file_path.parent.mkdir(parents=True, exist_ok=True)
+def monitor_urls(storage_location, urls, scrape_interval, content_type, stop_event):
+    """
+    Monitor the given URLs for changes and log them into a CSV file.
+    Runs in a separate thread.
+    """
     global history
+    previous_hashes = [""] * len(urls)
+    storage_path = Path(storage_location)
+    # Initialize CSV file: write header if file doesn't exist
+    if not storage_path.exists():
+        with storage_path.open("w", newline='', encoding='utf-8') as csvfile:
+            csv_toolkit = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
+            csv_toolkit.writeheader()
+    options = Options()
+    options.headless = True
+    options.add_argument("--disable-gpu")
+    options.add_argument("--no-sandbox")
+    options.add_argument("--disable-dev-shm-usage")
+    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
+    try:
+        while not stop_event.is_set():
+            for i, url in enumerate(urls):
+                try:
+                    driver.get(url)
+                    time.sleep(2)  # Wait for the page to load
+                    if content_type == "text":
+                        current_content = driver.page_source
+                    elif content_type == "media":
+                        images = driver.find_elements(By.TAG_NAME, "img")
+                        current_content = ''.join([img.get_attribute('src') for img in images])
+                    elif content_type == "both":
+                        images = driver.find_elements(By.TAG_NAME, "img")
+                        current_content = driver.page_source + ''.join([img.get_attribute('src') for img in images])
+                    else:
+                        current_content = driver.page_source
+                    current_hash = hashlib.md5(current_content.encode('utf-8')).hexdigest()
+                    if current_hash != previous_hashes[i]:
+                        previous_hashes[i] = current_hash
+                        date_time = datetime.datetime.now()
+                        date_time_str = date_time.strftime("%Y-%m-%d %H:%M:%S")
+                        history_entry = f"Change detected at {url} on {date_time_str}"
+                        history.append(history_entry)
+                        with storage_path.open("a", newline='', encoding='utf-8') as csvfile:
+                            csv_toolkit = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
+                            csv_toolkit.writerow({
+                                "date": date_time.strftime("%Y-%m-%d"),
+                                "time": date_time.strftime("%H:%M:%S"),
+                                "url": url,
+                                "change": "Content changed"
+                            })
+                        logging.info(history_entry)
+                except Exception as e:
+                    logging.error(f"Error accessing {url}: {e}")
+            # Sleep in smaller intervals to allow quicker shutdown
+            for _ in range(scrape_interval * 60):
+                if stop_event.is_set():
+                    break
+                time.sleep(1)
+    except Exception as e:
+        logging.error(f"Unexpected error in monitoring thread: {e}")
+    finally:
+        driver.quit()
+        logging.info("Monitoring thread has been stopped.")
+def start_monitoring(storage_location, url1, url2, scrape_interval, content_type):
+    global current_task, monitoring_thread, stop_event, history
+    if monitoring_thread and monitoring_thread.is_alive():
+        return "Monitoring is already running.", history
+    history = []
     current_task = f"Monitoring URLs: {url1}, {url2}"
     history.append(f"Task started: {current_task}")
+    logging.info(current_task)
+    stop_event.clear()
+    urls = [url1, url2]
+    monitoring_thread = threading.Thread(
+        target=monitor_urls,
+        args=(storage_location, urls, scrape_interval, content_type, stop_event),
+        daemon=True
+    )
+    monitoring_thread.start()
+    return "Monitoring started.", history
+def stop_monitoring():
+    global current_task, monitoring_thread, stop_event, history
+    if monitoring_thread and monitoring_thread.is_alive():
+        stop_event.set()
+        monitoring_thread.join()
+        history.append("Monitoring stopped by user.")
+        logging.info("Monitoring stopped by user.")
+        current_task = None
+        return "Monitoring stopped.", history
+    else:
+        return "No monitoring task is currently running.", history
 # Define the chat response function
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
+    for user_msg, assistant_msg in history:
+        if user_msg:
+            messages.append({"role": "user", "content": user_msg})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": assistant_msg})
     messages.append({"role": "user", "content": message})
     response = ""
+    try:
+        for msg in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
+            temperature=temperature,
+            top_p=top_p,
+        ):
+            token = msg.choices[0].delta.get("content", "")
+            response += token
+            yield response
+    except Exception as e:
+        logging.error(f"Error in chatbot response: {e}")
+        yield "An error occurred while generating the response."
 # Create Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# Culvers Site Monitor and Chatbot")
+    gr.Markdown(
+        "Monitor changes on Culvers' websites and log them into a CSV file. "
+        "Also, chat with a friendly chatbot."
+    )
+    with gr.Tab("Monitor"):
+        with gr.Row():
+            storage_location = gr.Textbox(
+                value=str(default_file_path),
+                label="Storage Location",
+                placeholder="Path to CSV file where changes will be logged"
+            )
+        with gr.Row():
+            url1 = gr.Textbox(
+                value="https://www.culver.k12.in.us/",
+                label="URL 1",
+                placeholder="First URL to monitor"
+            )
+            url2 = gr.Textbox(
+                value="https://www.facebook.com/CulverCommunitySchools",
+                label="URL 2",
+                placeholder="Second URL to monitor"
+            )
+        with gr.Row():
+            scrape_interval = gr.Slider(
+                minimum=1,
+                maximum=60,
+                value=5,
+                step=1,
+                label="Scrape Interval (minutes)"
+            )
+            content_type = gr.Radio(
+                choices=["text", "media", "both"],
+                value="text",
+                label="Content Type"
+            )
+        with gr.Row():
+            start_button = gr.Button("Start Monitoring")
+            stop_button = gr.Button("Stop Monitoring")
+        with gr.Row():
+            monitoring_status = gr.Textbox(
+                value="No active monitoring.",
+                label="Monitoring Status",
+                interactive=False
+            )
+        with gr.Row():
+            monitoring_history = gr.Textbox(
+                value="",
+                label="Monitoring History",
+                lines=10,
+                interactive=False
+            )
+    with gr.Tab("Chatbot"):
+        chatbot = gr.Chatbot(label="Chat with the Assistant")
+        with gr.Row():
+            system_message = gr.Textbox(
+                value="You are a friendly Chatbot.",
+                label="System Message",
+                visible=False
+            )
+        with gr.Row():
+            user_input = gr.Textbox(
+                label="You:",
+                placeholder="Type your message here..."
+            )
+            submit_button = gr.Button("Send")
+        # Parameters
+        max_tokens = gr.Slider(
+            minimum=1,
+            maximum=2048,
+            value=512,
+            step=1,
+            label="Max new tokens"
+        )
+        temperature = gr.Slider(
+            minimum=0.1,
+            maximum=4.0,
+            value=0.7,
+            step=0.1,
+            label="Temperature"
+        )
+        top_p = gr.Slider(
             minimum=0.1,
             maximum=1.0,
             value=0.95,
             step=0.05,
+            label="Top-p (nucleus sampling)"
+        )
+    # Define interactions
+    def update_monitoring_history(message, history_text):
+        return history_text + message + "\n"
+    start_button.click(
+        fn=start_monitoring,
+        inputs=[storage_location, url1, url2, scrape_interval, content_type],
+        outputs=[monitoring_status, monitoring_history],
+        queue=False
+    )
+    stop_button.click(
+        fn=stop_monitoring,
+        inputs=None,
+        outputs=[monitoring_status, monitoring_history],
+        queue=False
+    )
+    def display_history(status, hist):
+        return status, "\n".join(hist)
+    # Update monitoring_status and monitoring_history periodically
+    def refresh_monitoring(status, hist):
+        return status, "\n".join(hist)
+    user_input.submit(
+        lambda msg, hist, sys, max_t, temp, tp: (
+            gr.update(value=hist + [(msg, "")]),
+            respond(msg, hist, sys, max_t, temp, tp)
         ),
+        inputs=[user_input, chatbot, system_message, max_tokens, temperature, top_p],
+        outputs=[chatbot, chatbot]
+    )
+    submit_button.click(
+        lambda msg, hist, sys, max_t, temp, tp: (
+            gr.update(value=hist + [(msg, "")]),
+            respond(msg, hist, sys, max_t, temp, tp)
+        ),
+        inputs=[user_input, chatbot, system_message, max_tokens, temperature, top_p],
+        outputs=[chatbot, chatbot]
+    )
 if __name__ == "__main__":
     demo.launch()