acecalisto3 commited on
Commit
6bf8a84
1 Parent(s): 2bed3a1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +161 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import os
3
+ import csv
4
+ import time
5
+ import hashlib
6
+ import logging
7
+ import gradio as gr
8
+ from selenium import webdriver
9
+ from selenium.webdriver.chrome.service import Service
10
+ from selenium.webdriver.chrome.options import Options
11
+ from webdriver_manager.chrome import ChromeDriverManager
12
+ from huggingface_hub import InferenceClient
13
+
14
+ # Configure logging
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
+
17
+ # Define constants
18
+ PREFIX = "Task started at {date_time_str}. Purpose: {purpose}"
19
+ TASK_PROMPT = "Current task: {task}. History:\n{history}"
20
+
21
+ # Define current date/time
22
+ date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
23
+
24
+ # Define purpose
25
+ purpose = """
26
+ You go to Culvers sites, you continuously seek changes on them since your last observation.
27
+ Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data.
28
+ """
29
+
30
+ # Define history
31
+ history = []
32
+
33
+ # Define current task
34
+ current_task = None
35
+
36
+ # Default file path
37
+ default_file_path = "user/app/scraped_data/culver/culvers_changes.csv"
38
+
39
+ # Ensure the directory exists
40
+ os.makedirs(os.path.dirname(default_file_path), exist_ok=True)
41
+
42
+ # Function to monitor URLs for changes
43
+ def monitor_urls(storage_location, url1, url2, scrape_interval, content_type):
44
+ global history
45
+ urls = [url1, url2]
46
+ previous_hashes = ["", ""]
47
+
48
+ # Ensure the directory exists
49
+ os.makedirs(os.path.dirname(storage_location), exist_ok=True)
50
+
51
+ with open(storage_location, "w", newline='') as csvfile:
52
+ csv_toolkit = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
53
+ csv_toolkit.writeheader()
54
+
55
+ options = Options()
56
+ options.headless = True
57
+ options.add_argument("--disable-gpu")
58
+ options.add_argument("--no-sandbox")
59
+ options.add_argument("--disable-dev-shm-usage")
60
+
61
+ with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options) as driver:
62
+ try:
63
+ while True:
64
+ for i, url in enumerate(urls):
65
+ try:
66
+ driver.get(url)
67
+ time.sleep(2) # Wait for the page to load
68
+ if content_type == "text":
69
+ current_content = driver.page_source
70
+ elif content_type == "media":
71
+ current_content = driver.find_elements_by_tag_name("img")
72
+ else:
73
+ current_content = driver.page_source
74
+
75
+ current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
76
+
77
+ if current_hash != previous_hashes[i]:
78
+ previous_hashes[i] = current_hash
79
+ date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
80
+ history.append(f"Change detected at {url} on {date_time_str}")
81
+ csv_toolkit.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
82
+ logging.info(f"Change detected at {url} on {date_time_str}")
83
+ except Exception as e:
84
+ logging.error(f"Error accessing {url}: {e}")
85
+
86
+ time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
87
+ except KeyboardInterrupt:
88
+ logging.info("Monitoring stopped by user.")
89
+ finally:
90
+ driver.quit()
91
+
92
+ # Define main function to handle user input
93
+ def handle_input(storage_location, url1, url2, scrape_interval, content_type):
94
+ global current_task, history
95
+
96
+ current_task = f"Monitoring URLs: {url1}, {url2}"
97
+ history.append(f"Task started: {current_task}")
98
+ monitor_urls(storage_location, url1, url2, scrape_interval, content_type)
99
+ return TASK_PROMPT.format(task=current_task, history="\n".join(history))
100
+
101
+ # Define the chat response function
102
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
103
+
104
+ def respond(
105
+ message,
106
+ history: list[tuple[str, str]],
107
+ system_message,
108
+ max_tokens,
109
+ temperature,
110
+ top_p,
111
+ ):
112
+ messages = [{"role": "system", "content": system_message}]
113
+
114
+ for val in history:
115
+ if val[0]:
116
+ messages.append({"role": "user", "content": val[0]})
117
+ if val[1]:
118
+ messages.append({"role": "assistant", "content": val[1]})
119
+
120
+ messages.append({"role": "user", "content": message})
121
+
122
+ response = ""
123
+
124
+ for message in client.chat_completion(
125
+ messages,
126
+ max_tokens=max_tokens,
127
+ stream=True,
128
+ temperature=temperature,
129
+ top_p=top_p,
130
+ ):
131
+ token = message.choices[0].delta.content
132
+
133
+ response += token
134
+ yield response
135
+
136
+ # Create Gradio interface
137
+ demo = gr.ChatInterface(
138
+ respond,
139
+ additional_inputs=[
140
+ gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
141
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
142
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
143
+ gr.Slider(
144
+ minimum=0.1,
145
+ maximum=1.0,
146
+ value=0.95,
147
+ step=0.05,
148
+ label="Top-p (nucleus sampling)",
149
+ ),
150
+ gr.Textbox(value=default_file_path, label="Storage Location"),
151
+ gr.Textbox(value="https://www.culver.k12.in.us/", label="URL 1"),
152
+ gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2"),
153
+ gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)"),
154
+ gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type"),
155
+ ],
156
+ title="Culvers Site Monitor and Chatbot",
157
+ description="Monitor changes on Culvers' websites and log them into a CSV file. Also, chat with a friendly chatbot."
158
+ )
159
+
160
+ if __name__ == "__main__":
161
+ demo.launch()