acecalisto3 commited on
Commit
e8560ad
1 Parent(s): e051370

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +132 -168
app.py CHANGED
@@ -1,227 +1,191 @@
1
- import datetime
2
  import os
3
- import csv
4
  import time
5
  import hashlib
6
  import logging
7
- import gradio as gr
 
 
8
  from selenium import webdriver
9
  from selenium.webdriver.chrome.service import Service
10
  from selenium.webdriver.chrome.options import Options
11
- from webdriver_manager.chrome import ChromeDriverManager
12
- from huggingface_hub import InferenceClient
13
- import random
14
- import yaml
15
- import requests
16
- from bs4 import BeautifulSoup
17
-
18
- urls = [
19
- "https://twitter.com/wlcscrdp",
20
- "https://www.facebook.com/aurorareddevils/",
21
- "https://www.facebook.com/brightpanthers/",
22
- "https://www.facebook.com/carrollcountychamberin/",
23
- "https://www.facebook.com/Culver.Cavs.MHS",
24
- "https://www.facebook.com/culver.elementary.school",
25
- "https://www.facebook.com/CulverCommunitySchools",
26
- "https://www.facebook.com/DillsboroBulldogs/",
27
- "https://www.facebook.com/ECMSTROJANS",
28
- "https://www.facebook.com/enjoywhitecountyIN/",
29
- "https://www.facebook.com/farmersvilleelementary",
30
- "https://www.facebook.com/groups/SDMSparents",
31
- "https://www.facebook.com/jghsart/",
32
- "https://www.facebook.com/jgmusicdept",
33
- "https://www.facebook.com/John-Glenn-Education-Foundation-208326199636364/",
34
- "https://www.facebook.com/John-Glenn-High-School-1102148953201006/",
35
- "https://www.facebook.com/John-Glenn-Theatre-Company-383638295064502/",
36
- "https://www.facebook.com/JohnGlennFalconsAthletics",
37
- "https://www.facebook.com/KIRPC-Head-Start-1485812354989001",
38
- "https://www.facebook.com/KIRPC1",
39
- "https://www.facebook.com/LHNEeagles",
40
- "https://www.facebook.com/LuceElementarySchool/",
41
- "https://www.facebook.com/marrselementary",
42
- "https://www.facebook.com/messhiners/",
43
- "https://www.facebook.com/monticellocitypool",
44
- "https://www.facebook.com/monticelloinwastewater/",
45
- "https://www.facebook.com/MooresHillBobcats/",
46
- "https://www.facebook.com/msdmv",
47
- "https://www.facebook.com/msdnorthposey",
48
- "https://www.facebook.com/MUTPL/",
49
- "https://www.facebook.com/MVJHS/",
50
- "https://www.facebook.com/mvshs",
51
- "https://www.facebook.com/njspjrsrhighschool?mibextid=b06tZ0",
52
- "https://www.facebook.com/NorthElementaryStars/",
53
- "https://www.facebook.com/NorthLibertyElementary/",
54
- "https://www.facebook.com/northposey/",
55
- "https://www.facebook.com/northposeyhs/",
56
- "https://www.facebook.com/NPJuniorHigh",
57
- "https://www.facebook.com/Prairie-Heights-Elementary-659322230934707/",
58
- "https://www.facebook.com/Prairie-Heights-High-School-2027713067459043/",
59
- "https://www.facebook.com/PrairieHeightsPanthers/",
60
- "https://www.facebook.com/profile.php?id=100057030237096",
61
- "https://www.facebook.com/profile.php?id=100057451179651",
62
- "https://www.facebook.com/profile.php?id=100063463513451",
63
- "https://www.facebook.com/profile.php?id=100063612319256",
64
- "https://www.facebook.com/profile.php?id=100064532596422",
65
- "https://www.facebook.com/profile.php?id=100067180226810",
66
- "https://www.facebook.com/profile.php?id=61563484312348",
67
- "https://www.facebook.com/PTOSWES/",
68
- "https://www.facebook.com/RandolphSouthern/",
69
- "https://www.facebook.com/RochesterMiddleSchool",
70
- "https://www.facebook.com/RochesterZebraNewTechHigh",
71
- "https://www.facebook.com/rockportelementarysouthspencer/",
72
- "https://www.facebook.com/satellitesathletics/",
73
- "https://www.facebook.com/seymourcommunityschools/",
74
- "https://www.facebook.com/SeymourHighSchool/",
75
- "https://www.facebook.com/SouthDearbornHighSchool/",
76
- "https://www.facebook.com/southarbornschools/",
77
- "https://www.facebook.com/SouthDearbornSquires/",
78
- "https://www.facebook.com/southspencerhighschool",
79
- "https://www.facebook.com/southspencermiddleschool/",
80
- "https://www.facebook.com/SouthSpencerSchools",
81
- "https://www.facebook.com/SouthTerracePanthers/",
82
- "https://www.facebook.com/sunmantigers/",
83
- "https://www.facebook.com/SWShelbySpartan/",
84
- "https://www.facebook.com/TallTimbersMarina",
85
- "https://www.facebook.com/WabashValleyESC/",
86
- "https://www.facebook.com/Walkerton-Elementary-School-283088605088622/",
87
- "https://www.facebook.com/westcentralcte/",
88
- "https://www.facebook.com/westelementary",
89
- "https://www.facebook.com/wlcscrdp",
90
- "https://www.instagram.com/mutpl/",
91
- "https://www.instagram.com/northposeyhsathletics",
92
- "https://www.instagram.com/rchsprincipalcook/",
93
- "https://www.instagram.com/southdearbornhighschool/",
94
- "https://www.instagram.com/southdearbornschools/",
95
- "https://www.instagram.com/westcentralcte/",
96
- "https://www.tiktok.com/@mutplteen"
97
- ]
98
-
99
 
100
  # Configure logging
101
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
102
 
103
  # Define constants
104
- DATE_TIME_STR = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
105
  PURPOSE = f"You go to Culvers sites, you continuously seek changes on them since your last observation. Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data."
106
  HISTORY = []
107
  CURRENT_TASK = None
108
- DEFAULT_FILE_PATH = "user/app/scraped_data/culver/culvers_changes.csv"
109
 
110
- # Ensure the directory exists
111
- os.makedirs(os.path.dirname(DEFAULT_FILE_PATH), exist_ok=True)
112
 
113
  # Function to monitor URLs for changes
114
  def monitor_urls(storage_location, urls, scrape_interval, content_type):
115
  global HISTORY
116
- previous_hashes = [""] * len(urls)
117
 
118
  try:
119
- with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=Options()) as driver:
120
  while True:
121
- for i, url in enumerate(urls):
122
  try:
123
  driver.get(url)
124
  time.sleep(2) # Wait for the page to load
125
  if content_type == "text":
126
  current_content = driver.page_source
127
  elif content_type == "media":
128
- current_content = driver.find_elements_by_tag_name("img")
129
  else:
130
  current_content = driver.page_source
131
  current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
132
- if current_hash != previous_hashes[i]:
133
- previous_hashes[i] = current_hash
134
  date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
135
  HISTORY.append(f"Change detected at {url} on {date_time_str}")
136
- with open(storage_location, "a", newline="") as csvfile:
137
  csv_writer = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
138
  csv_writer.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
139
  logging.info(f"Change detected at {url} on {date_time_str}")
140
- except Exception as e:
141
  logging.error(f"Error accessing {url}: {e}")
142
  time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
143
  except Exception as e:
144
  logging.error(f"Error starting ChromeDriver: {e}")
145
 
146
- # Define main function to handle user input
147
- def handle_input(storage_location, urls, scrape_interval, content_type):
148
  global CURRENT_TASK, HISTORY
149
 
150
  CURRENT_TASK = f"Monitoring URLs: {', '.join(urls)}"
151
  HISTORY.append(f"Task started: {CURRENT_TASK}")
152
- monitor_urls(storage_location, urls, scrape_interval, content_type)
153
- return TASK_PROMPT.format(task=CURRENT_TASK, history="\n".join(map(str, HISTORY)))
154
-
155
- # Load custom prompts
156
- try:
157
- with open("custom_prompts.yaml", "r") as fp:
158
- custom_prompts = yaml.safe_load(fp)
159
- except FileNotFoundError:
160
- custom_prompts = {"WEB_DEV": "", "AI_SYSTEM_PROMPT": "", "PYTHON_CODE_DEV": "", "CODE_GENERATION": "", "CODE_INTERPRETATION": "", "CODE_TRANSLATION": "", "CODE_IMPLEMENTATION": ""}
161
 
162
- # Define agents
163
- AGENTS = ["WEB_DEV", "AI_SYSTEM_PROMPT", "PYTHON_CODE_DEV", "CODE_GENERATION", "CODE_INTERPRETATION", "CODE_TRANSLATION", "CODE_IMPLEMENTATION"]
164
-
165
- # Define the Mistral inference client
166
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
167
-
168
- # Define the chat response function
169
- def respond(message, history, system_message, max_tokens, temperature, top_p):
170
- return generate(message, history, system_message, max_tokens, temperature, top_p)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
172
- # Function to start scraping
173
- def start_scraping(storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
174
- urls = [url for url in [url1, url2, url3, url4, url5, url6, url7, url8, url9, url10] if url]
175
- handle_input(storage_location, urls, scrape_interval, content_type)
176
  return f"Started scraping {', '.join(urls)} every {scrape_interval} minutes."
177
 
178
  # Function to display CSV content
179
- def display_csv(storage_location):
180
- if os.path.exists(storage_location):
181
- with open(storage_location, "r") as file:
 
 
 
182
  return file.read()
183
  else:
184
  return "No data available."
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  # Create Gradio interface
187
- def chat_interface(message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type):
188
- global HISTORY
189
- response = respond(message, HISTORY, system_message, max_tokens, temperature, top_p)
190
- HISTORY.append((message, response))
191
- return HISTORY, ""
192
-
193
- demo = gr.Blocks()
194
-
195
- with demo:
196
- with gr.Row():
197
- with gr.Column():
198
- message = gr.Textbox(label="Message")
199
- system_message = gr.Textbox(value="You are a friendly Chatbot.", label="System message")
200
- max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
201
- temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
202
- top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
203
- storage_location = gr.Textbox(value=DEFAULT_FILE_PATH, label="Storage Location")
204
- url1 = gr.Textbox(value="https://www.culver.k12.in/", label="URL 1")
205
- url2 = gr.Textbox(value="https://www.facebook.com/CulverCommunitySchools", label="URL 2")
206
- url3 = gr.Textbox(label="URL 3")
207
- url4 = gr.Textbox(label="URL 4")
208
- url5 = gr.Textbox(label="URL 5")
209
- url6 = gr.Textbox(label="URL 6")
210
- url7 = gr.Textbox(label="URL 7")
211
- url8 = gr.Textbox(label="URL 8")
212
- url9 = gr.Textbox(label="URL 9")
213
- url10 = gr.Textbox(label="URL 10")
214
- scrape_interval = gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)")
215
- content_type = gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type")
216
- start_button = gr.Button("Start Scraping")
217
- csv_output = gr.Textbox(label="CSV Output", interactive=False)
218
-
219
- with gr.Column():
220
- chat_history = gr.Chatbot(label="Chat History")
221
- response_box = gr.Textbox(label="Response")
222
-
223
- start_button.click(start_scraping, inputs=[storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type], outputs=csv_output)
224
- message.submit(chat_interface, inputs=[message, system_message, max_tokens, temperature, top_p, storage_location, url1, url2, url3, url4, url5, url6, url7, url8, url9, url10, scrape_interval, content_type], outputs=[chat_history, response_box])
225
 
226
  if __name__ == "__main__":
 
227
  demo.launch()
 
 
1
  import os
 
2
  import time
3
  import hashlib
4
  import logging
5
+ import datetime
6
+ import csv
7
+ from urllib.parse import urlparse
8
  from selenium import webdriver
9
  from selenium.webdriver.chrome.service import Service
10
  from selenium.webdriver.chrome.options import Options
11
+ from selenium.webdriver.common.by import By
12
+ from selenium.webdriver.support.ui import WebDriverWait
13
+ from selenium.webdriver.support import expected_conditions as EC
14
+ from selenium.webdriver.common.keys import Keys
15
+ from selenium.webdriver.common.exceptions import NoSuchElementException
16
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
17
+ from transformers import pipeline
18
+ import feedparser
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
  # Configure logging
21
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
22
 
23
  # Define constants
24
+ DEFAULT_FILE_PATH = "scraped_data"
25
  PURPOSE = f"You go to Culvers sites, you continuously seek changes on them since your last observation. Anything new that gets logged and dumped into csv, stored in your log folder at user/app/scraped_data."
26
  HISTORY = []
27
  CURRENT_TASK = None
 
28
 
29
+ # Define the list of URLs to monitor (you can add more URLs here)
30
+ URLS_TO_MONITOR = ["https://twitter.com/wlcscrdp", "https://www.facebook.com/aurorareddevils/", "https://www.facebook.com/brightpanthers/", "https://www.facebook.com/carrollcountychamberin/", "https://www.facebook.com/Culver.Cavs.MHS", "https://www.facebook.com/culver.elementary.school", "https://www.facebook.com/CulverCommunitySchools", "https://www.facebook.com/DillsboroBulldogs/", "https://www.facebook.com/ECMSTROJANS", "https://www.facebook.com/enjoywhitecountyIN/", "https://www.facebook.com/farmersvilleelementary", "https://www.facebook.com/groups/SDMSparents", "https://www.facebook.com/jghsart/", "https://www.facebook.com/jgmusicdept", "https://www.facebook.com/John-Glenn-Education-Foundation-208326199636364/", "https://www.facebook.com/John-Glenn-High-School-1102148953201006/", "https://www.facebook.com/John-Glenn-Theatre-Company-383638295064502/", "https://www.facebook.com/JohnGlennFalconsAthletics", "https://www.facebook.com/KIRPC-Head-Start-1485812354989001", "https://www.facebook.com/KIRPC1", "https://www.facebook.com/LHNEeagles", "https://www.facebook.com/LuceElementarySchool/", "https://www.facebook.com/marrselementary", "https://www.facebook.com/messhiners/", "https://www.facebook.com/monticellocitypool", "https://www.facebook.com/monticelloinwastewater/", "https://www.facebook.com/MooresHillBobcats/", "https://www.facebook.com/msdmv", "https://www.facebook.com/msdnorthposey", "https://www.facebook.com/MUTPL/", "https://www.facebook.com/MVJHS/", "https://www.facebook.com/mvshs", "https://www.facebook.com/njspjrsrhighschool?mibextid=b06tZ0", "https://www.facebook.com/NorthElementaryStars/", "https://www.facebook.com/NorthLibertyElementary/", "https://www.facebook.com/northposey/", "https://www.facebook.com/northposeyhs/", "https://www.facebook.com/NPJuniorHigh", "https://www.facebook.com/Prairie-Heights-Elementary-659322230934707/", "https://www.facebook.com/Prairie-Heights-High-School-2027713067459043/", "https://www.facebook.com/PrairieHeightsPanthers/", "https://www.facebook.com/profile.php?id=100057030237096", "https://www.facebook.com/profile.php?id=100057451179651", "https://www.facebook.com/profile.php?id=100063463513451", "https://www.facebook.com/profile.php?id=100063612319256", "https://www.facebook.com/profile.php?id=100064532596422", "https://www.facebook.com/profile.php?id=100067180226810", "https://www.facebook.com/profile.php?id=61563484312348", "https://www.facebook.com/PTOSWES/", "https://www.facebook.com/RandolphSouthern/", "https://www.facebook.com/RochesterMiddleSchool", "https://www.facebook.com/RochesterZebraNewTechHigh", "https://www.facebook.com/rockportelementarysouthspencer/", "https://www.facebook.com/satellitesathletics/", "https://www.facebook.com/seymourcommunityschools/", "https://www.facebook.com/SeymourHighSchool/", "https://www.facebook.com/SouthDearbornHighSchool/", "https://www.facebook.com/southarbornschools/", "https://www.facebook.com/SouthDearbornSquires/", "https://www.facebook.com/southspencerhighschool", "https://www.facebook.com/southspencermiddleschool/", "https://www.facebook.com/SouthSpencerSchools", "https://www.facebook.com/SouthTerracePanthers/", "https://www.facebook.com/sunmantigers/", "https://www.facebook.com/SWShelbySpartan/", "https://www.facebook.com/TallTimbersMarina", "https://www.facebook.com/WabashValleyESC/", "https://www.facebook.com/Walkerton-Elementary-School-283088605088622/", "https://www.facebook.com/westcentralcte/", "https://www.facebook.com/westelementary", "https://www.facebook.com/wlcscrdp", "https://www.instagram.com/mutpl/", "https://www.instagram.com/northposeyhsathletics", "https://www.instagram.com/rchsprincipalcook/", "https://www.instagram.com/southdearbornhighschool/", "https://www.instagram.com/southdearbornschools/", "https://www.instagram.com/westcentralcte/", "https://www.tiktok.com/@mutplteen"]
31
 
32
  # Function to monitor URLs for changes
33
  def monitor_urls(storage_location, urls, scrape_interval, content_type):
34
  global HISTORY
35
+ previous_hashes = {url: "" for url in urls} # Use a dictionary for better organization
36
 
37
  try:
38
+ with webdriver.Chrome(service=Service(webdriver.ChromeDriverManager().install()), options=Options()) as driver:
39
  while True:
40
+ for url in urls:
41
  try:
42
  driver.get(url)
43
  time.sleep(2) # Wait for the page to load
44
  if content_type == "text":
45
  current_content = driver.page_source
46
  elif content_type == "media":
47
+ current_content = driver.find_elements(By.TAG_NAME, "img")
48
  else:
49
  current_content = driver.page_source
50
  current_hash = hashlib.md5(str(current_content).encode('utf-8')).hexdigest()
51
+ if current_hash != previous_hashes[url]:
52
+ previous_hashes[url] = current_hash
53
  date_time_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
54
  HISTORY.append(f"Change detected at {url} on {date_time_str}")
55
+ with open(os.path.join(storage_location, f"{urlparse(url).hostname}_changes.csv"), "a", newline="") as csvfile:
56
  csv_writer = csv.DictWriter(csvfile, fieldnames=["date", "time", "url", "change"])
57
  csv_writer.writerow({"date": date_time_str.split()[0], "time": date_time_str.split()[1], "url": url, "change": "Content changed"})
58
  logging.info(f"Change detected at {url} on {date_time_str}")
59
+ except (NoSuchElementException, Exception) as e:
60
  logging.error(f"Error accessing {url}: {e}")
61
  time.sleep(scrape_interval * 60) # Check every scrape_interval minutes
62
  except Exception as e:
63
  logging.error(f"Error starting ChromeDriver: {e}")
64
 
65
+ # Function to start scraping
66
+ def start_scraping(storage_location, urls, scrape_interval, content_type):
67
  global CURRENT_TASK, HISTORY
68
 
69
  CURRENT_TASK = f"Monitoring URLs: {', '.join(urls)}"
70
  HISTORY.append(f"Task started: {CURRENT_TASK}")
 
 
 
 
 
 
 
 
 
71
 
72
+ for url in urls:
73
+ # Create a folder for the URL
74
+ hostname = urlparse(url).hostname
75
+ folder_path = os.path.join(storage_location, hostname)
76
+ os.makedirs(folder_path, exist_ok=True)
77
+
78
+ # Log the initial observation
79
+ try:
80
+ with webdriver.Chrome(service=Service(webdriver.ChromeDriverManager().install()), options=Options()) as driver:
81
+ driver.get(url)
82
+ time.sleep(2) # Wait for the page to load
83
+ if content_type == "text":
84
+ initial_content = driver.page_source
85
+ elif content_type == "media":
86
+ initial_content = driver.find_elements(By.TAG_NAME, "img")
87
+ else:
88
+ initial_content = driver.page_source
89
+ initial_hash = hashlib.md5(str(initial_content).encode('utf-8')).hexdigest()
90
+ HISTORY.append(f"Initial observation at {url}: {initial_hash}")
91
+ with open(os.path.join(folder_path, f"{hostname}_initial_observation.txt"), "w") as file:
92
+ file.write(f"Initial observation at {url}: {initial_hash}")
93
+ except (NoSuchElementException, Exception) as e:
94
+ HISTORY.append(f"Error accessing {url}: {e}")
95
+
96
+ # Monitor the URLs
97
+ monitor_urls(storage_location, urls, scrape_interval, content_type)
98
 
 
 
 
 
99
  return f"Started scraping {', '.join(urls)} every {scrape_interval} minutes."
100
 
101
  # Function to display CSV content
102
+ def display_csv(url):
103
+ hostname = urlparse(url).hostname
104
+ folder_path = os.path.join(DEFAULT_FILE_PATH, hostname)
105
+ csv_path = os.path.join(folder_path, f"{hostname}_changes.csv")
106
+ if os.path.exists(csv_path):
107
+ with open(csv_path, "r") as file:
108
  return file.read()
109
  else:
110
  return "No data available."
111
 
112
+ # Define the chat response function using the Mistral model
113
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
114
+ model = AutoModelForSeq2SeqLM.from_pretrained_model("mistralai/Mixtral-8x7B-Instruct-v0.1")
115
+ tokenizer = AutoTokenizer.from_pretrained_model("mistralai/Mixtral-8x7B-Instruct-v0.1")
116
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
117
+ response = pipe(f"User: {message}\nHistory: {history}\nSystem: {system_message}", max_length=max_tokens, temperature=temperature, top_p=top_p)[0]
118
+ return response
119
+
120
+ # Function to generate RSS feed for a given URL
121
+ def generate_rss_feed(url):
122
+ hostname = urlparse(url).hostname
123
+ folder_path = os.path.join(DEFAULT_FILE_PATH, hostname)
124
+ csv_path = os.path.join(folder_path, f"{hostname}_changes.csv")
125
+ if os.path.exists(csv_path):
126
+ with open(csv_path, "r") as file:
127
+ reader = csv.DictReader(file)
128
+ feed = feedparser.parse(f"rss.xml") # Create a new feed object
129
+ feed.feed.title = f"Changes for {hostname}"
130
+ feed.feed.link = url
131
+ feed.feed.description = "Recent changes detected on the website."
132
+ feed.entries = []
133
+ for row in reader:
134
+ feed.entries.append({
135
+ "title": f"Change detected at {row['url']}",
136
+ "link": row['url'],
137
+ "description": f"Content changed on {row['date']} at {row['time']}",
138
+ "published": datetime.datetime.strptime(f"{row['date']} {row['time']}", "%Y-%m-%d %H:%M:%S").isoformat(),
139
+ })
140
+ return feed.entries
141
+ else:
142
+ return "No data available."
143
+
144
+ # Function to handle user input and generate response
145
+ def chat_interface(message, history, system_message, max_tokens, temperature, top_p, storage_location, urls, scrape_interval, content_type):
146
+ response = respond(message, history, system_message, max_tokens, temperature, top_p)
147
+ history.append((message, response))
148
+ return history, response
149
+
150
+ app.py
151
+
152
+ import gradio as gr
153
+ from agent import start_scraping, display_csv, generate_rss_feed, chat_interface
154
+
155
  # Create Gradio interface
156
+ def create_interface():
157
+ with gr.Blocks() as demo:
158
+ with gr.Row():
159
+ with gr.Column():
160
+ message = gr.Textbox(label="Message")
161
+ system_message = gr.Textbox(value="You are a helpful assistant.", label="System message")
162
+ max_tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
163
+ temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
164
+ top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
165
+ storage_location = gr.Textbox(value="scraped_data", label="Storage Location")
166
+ urls = gr.Textbox(label="URLs (comma separated)")
167
+ scrape_interval = gr.Slider(minimum=1, maximum=60, value=5, step=1, label="Scrape Interval (minutes)")
168
+ content_type = gr.Radio(choices=["text", "media", "both"], value="text", label="Content Type")
169
+ start_button = gr.Button("Start Scraping")
170
+ csv_output = gr.Textbox(label="CSV Output", interactive=False)
171
+
172
+ with gr.Column():
173
+ chat_history = gr.Chatbot(label="Chat History")
174
+ response_box = gr.Textbox(label="Response")
175
+
176
+ start_button.click(start_scraping, inputs=[storage_location, urls, scrape_interval, content_type], outputs=csv_output)
177
+ message.submit(chat_interface, inputs=[message, chat_history, system_message, max_tokens, temperature, top_p, storage_location, urls, scrape_interval, content_type], outputs=[chat_history, response_box])
178
+
179
+ # Add a button to display the RSS feed for a selected URL
180
+ with gr.Row():
181
+ selected_url = gr.Textbox(label="Select URL for RSS Feed")
182
+ rss_button = gr.Button("Generate RSS Feed")
183
+ rss_output = gr.Textbox(label="RSS Feed Output", interactive=False)
184
+
185
+ rss_button.click(generate_rss_feed, inputs=[selected_url], outputs=rss_output)
186
+
187
+ return demo
 
 
 
 
 
 
188
 
189
  if __name__ == "__main__":
190
+ demo = create_interface()
191
  demo.launch()