Spaces:
Runtime error
Runtime error
acecalisto3
commited on
Commit
•
575405c
1
Parent(s):
1583001
Update agent.py
Browse files
agent.py
CHANGED
@@ -1,5 +1,3 @@
|
|
1 |
-
# agent.py
|
2 |
-
|
3 |
import os
|
4 |
import time
|
5 |
import hashlib
|
@@ -57,7 +55,7 @@ def monitor_urls(storage_location, urls, scrape_interval, content_type, selector
|
|
57 |
current_content = driver.page_source
|
58 |
|
59 |
current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
|
60 |
-
if current_hash!= previous_hashes[url]:
|
61 |
previous_hashes[url] = current_hash
|
62 |
date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
63 |
HISTORY.append(f"Change detected at {url} on {date_time_str}")
|
@@ -71,14 +69,6 @@ def monitor_urls(storage_location, urls, scrape_interval, content_type, selector
|
|
71 |
except Exception as e:
|
72 |
logging.error(f"Error starting ChromeDriver: {e}")
|
73 |
|
74 |
-
# Function to define the chat response function using the Mistral model
|
75 |
-
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
76 |
-
model = AutoModelForSeq2SeqLM.from_pretrained_model("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
77 |
-
tokenizer = AutoTokenizer.from_pretrained_model("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
78 |
-
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
79 |
-
response = pipe(f"User: {message}\nHistory: {history}\nSystem: {system_message}", max_length=max_tokens, temperature=temperature, top_p=top_p)[0]
|
80 |
-
return response
|
81 |
-
|
82 |
# Function to start scraping
|
83 |
def start_scraping(storage_location, urls, scrape_interval, content_type, selector=None):
|
84 |
global CURRENT_TASK, HISTORY
|
@@ -161,8 +151,59 @@ def generate_rss_feed(storage_location, url):
|
|
161 |
else:
|
162 |
return "No data available."
|
163 |
|
164 |
-
# Function to
|
165 |
-
def
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import time
|
3 |
import hashlib
|
|
|
55 |
current_content = driver.page_source
|
56 |
|
57 |
current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
|
58 |
+
if current_hash != previous_hashes[url]:
|
59 |
previous_hashes[url] = current_hash
|
60 |
date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
61 |
HISTORY.append(f"Change detected at {url} on {date_time_str}")
|
|
|
69 |
except Exception as e:
|
70 |
logging.error(f"Error starting ChromeDriver: {e}")
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
# Function to start scraping
|
73 |
def start_scraping(storage_location, urls, scrape_interval, content_type, selector=None):
|
74 |
global CURRENT_TASK, HISTORY
|
|
|
151 |
else:
|
152 |
return "No data available."
|
153 |
|
154 |
+
# Function to define the chat response function using the Mistral model
|
155 |
+
def respond(message, history, system_message, max_tokens, temperature, top_p):
|
156 |
+
model = AutoModelForSeq2SeqLM.from_pretrained_model("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
157 |
+
tokenizer = AutoTokenizer.from_pretrained_model("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
158 |
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
159 |
+
response = pipe(f"User: {message}\nHistory: {history}\nSystem: {system_message}", max_length=max_tokens, temperature=temperature, top_p=top_p)[0]
|
160 |
+
return response
|
161 |
+
|
162 |
+
# Define the Gradio interface
|
163 |
+
def create_interface():
|
164 |
+
with gr.Blocks() as demo:
|
165 |
+
with gr.Row():
|
166 |
+
with gr.Column():
|
167 |
+
message = gr.Textbox(label="Message")
|
168 |
+
system_message = gr.Textbox(value="You are a helpful assistant.", label="System message")
|
169 |
+
max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
|
170 |
+
temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
|
171 |
+
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
|
172 |
+
storage_location = gr.Textbox(value="scraped_data", label="Storage Location")
|
173 |
+
urls = gr.Textbox(label="URLs (comma separated)")
|
174 |
+
scrape_interval = gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)")
|
175 |
+
content_type = gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type")
|
176 |
+
start_button = gr.Button("Start Scraping")
|
177 |
+
stop_button = gr.Button("Stop Scraping")
|
178 |
+
csv_output = gr.Textbox(label="CSV Output", interactive=False)
|
179 |
+
|
180 |
+
with gr.Column():
|
181 |
+
chat_history = gr.Chatbot(label="Chat History")
|
182 |
+
response_box = gr.Textbox(label="Response")
|
183 |
+
|
184 |
+
stop_scraping_flag = [False]
|
185 |
+
start_button.click(start_scraping, inputs=[storage_location, urls, scrape_interval, content_type], outputs=csv_output)
|
186 |
+
stop_button.click(stop_scraping, inputs=[stop_scraping_flag], outputs=[csv_output])
|
187 |
+
message.submit(respond, inputs=[message, chat_history, system_message, max_tokens, temperature, top_p], outputs=[chat_history, response_box])
|
188 |
+
|
189 |
+
# Add a button to display the CSV content for a selected URL
|
190 |
+
with gr.Row():
|
191 |
+
selected_url = gr.Textbox(label="Select URL for CSV Content")
|
192 |
+
csv_button = gr.Button("Display CSV Content")
|
193 |
+
csv_output = gr.Textbox(label="CSV Content Output", interactive=False)
|
194 |
+
|
195 |
+
csv_button.click(display_csv, inputs=[storage_location, selected_url], outputs=csv_output)
|
196 |
+
|
197 |
+
# Add a button to display the RSS feed for a selected URL
|
198 |
+
with gr.Row():
|
199 |
+
selected_url = gr.Textbox(label="Select URL for RSS Feed")
|
200 |
+
rss_button = gr.Button("Generate RSS Feed")
|
201 |
+
rss_output = gr.Textbox(label="RSS Feed Output", interactive=False)
|
202 |
+
|
203 |
+
rss_button.click(generate_rss_feed, inputs=[storage_location, selected_url], outputs=rss_output)
|
204 |
+
|
205 |
+
return demo
|
206 |
+
|
207 |
+
if __name__ == "__main__":
|
208 |
+
demo = create_interface()
|
209 |
+
demo.launch()
|