KingNish commited on
Commit
a001ae3
1 Parent(s): 9ce79a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -5
app.py CHANGED
@@ -9,6 +9,89 @@ import torch
9
  import sentencepiece as spm
10
  import onnxruntime as ort
11
  from huggingface_hub import hf_hub_download, InferenceClient
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Speech Recognition Model Configuration
14
  model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
@@ -49,10 +132,18 @@ def transcribe(audio_path):
49
 
50
  return text
51
 
52
- def model(text):
53
- formatted_prompt = system_instructions1 + text + "[JARVIS]"
54
- stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
55
- return "".join([response.token.text for response in stream if response.token.text != "</s>"])
 
 
 
 
 
 
 
 
56
 
57
  async def respond(audio):
58
  user = transcribe(audio)
@@ -65,9 +156,11 @@ async def respond(audio):
65
 
66
  with gr.Blocks() as demo:
67
  with gr.Row():
 
68
  input = gr.Audio(label="Voice Chat (BETA)", sources="microphone", type="filepath", waveform_options=False)
 
69
  output = gr.Audio(label="JARVIS", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
70
- gr.Interface(fn=respond, inputs=[input], outputs=[output], live=True)
71
 
72
  if __name__ == "__main__":
73
  demo.queue(max_size=200).launch()
 
9
  import sentencepiece as spm
10
  import onnxruntime as ort
11
  from huggingface_hub import hf_hub_download, InferenceClient
12
+ import requests
13
+ from bs4 import BeautifulSoup
14
+ import urllib
15
+ import random
16
+
17
+ # List of user agents to choose from for requests
18
+ _useragent_list = [
19
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0',
20
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
21
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
22
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
23
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36',
24
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62',
25
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0'
26
+ ]
27
+
28
+ def get_useragent():
29
+ """Returns a random user agent from the list."""
30
+ return random.choice(_useragent_list)
31
+
32
+ def extract_text_from_webpage(html_content):
33
+ """Extracts visible text from HTML content using BeautifulSoup."""
34
+ soup = BeautifulSoup(html_content, "html.parser")
35
+ # Remove unwanted tags
36
+ for tag in soup(["script", "style", "header", "footer", "nav"]):
37
+ tag.extract()
38
+ # Get the remaining visible text
39
+ visible_text = soup.get_text(strip=True)
40
+ return visible_text
41
+
42
+ def search(term, num_results=3, lang="en", advanced=True, sleep_interval=0, timeout=5, safe="active", ssl_verify=None):
43
+ """Performs a Google search and returns the results."""
44
+ escaped_term = urllib.parse.quote_plus(term)
45
+ start = 0
46
+ all_results = []
47
+
48
+ # Fetch results in batches
49
+ while start < num_results:
50
+ resp = requests.get(
51
+ url="https://www.google.com/search",
52
+ headers={"User-Agent": get_useragent()}, # Set random user agent
53
+ params={
54
+ "q": term,
55
+ "num": num_results - start, # Number of results to fetch in this batch
56
+ "hl": lang,
57
+ "start": start,
58
+ "safe": safe,
59
+ },
60
+ timeout=timeout,
61
+ verify=ssl_verify,
62
+ )
63
+ resp.raise_for_status() # Raise an exception if request fails
64
+
65
+ soup = BeautifulSoup(resp.text, "html.parser")
66
+ result_block = soup.find_all("div", attrs={"class": "g"})
67
+
68
+ # If no results, continue to the next batch
69
+ if not result_block:
70
+ start += 1
71
+ continue
72
+
73
+ # Extract link and text from each result
74
+ for result in result_block:
75
+ link = result.find("a", href=True)
76
+ if link:
77
+ link = link["href"]
78
+ try:
79
+ # Fetch webpage content
80
+ webpage = requests.get(link, headers={"User-Agent": get_useragent()})
81
+ webpage.raise_for_status()
82
+ # Extract visible text from webpage
83
+ visible_text = extract_text_from_webpage(webpage.text)
84
+ all_results.append({"link": link, "text": visible_text})
85
+ except requests.exceptions.RequestException as e:
86
+ # Handle errors fetching or processing webpage
87
+ print(f"Error fetching or processing {link}: {e}")
88
+ all_results.append({"link": link, "text": None})
89
+ else:
90
+ all_results.append({"link": None, "text": None})
91
+
92
+ start += len(result_block) # Update starting index for next batch
93
+
94
+ return all_results
95
 
96
  # Speech Recognition Model Configuration
97
  model_name = "neongeckocom/stt_en_citrinet_512_gamma_0_25"
 
132
 
133
  return text
134
 
135
+ def model(text, web_search):
136
+ if web_search is True:
137
+ """Performs a web search, feeds the results to a language model, and returns the answer."""
138
+ web_results = search(input)
139
+ web2 = ' '.join([f"Link: {res['link']}\nText: {res['text']}\n\n" for res in web_results])
140
+ formatted_prompt = system_instructions1 + input + "[WEB]" + str(web2) + "[ANSWER]"
141
+ stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
142
+ return "".join([response.token.text for response in stream if response.token.text != "</s>"])
143
+ else:
144
+ formatted_prompt = system_instructions1 + text + "[JARVIS]"
145
+ stream = client1.text_generation(formatted_prompt, max_new_tokens=512, stream=True, details=True, return_full_text=False)
146
+ return "".join([response.token.text for response in stream if response.token.text != "</s>"])
147
 
148
  async def respond(audio):
149
  user = transcribe(audio)
 
156
 
157
  with gr.Blocks() as demo:
158
  with gr.Row():
159
+ web_search = gr.Checkbox(label="Web Search", value=False)
160
  input = gr.Audio(label="Voice Chat (BETA)", sources="microphone", type="filepath", waveform_options=False)
161
+ with gr.Row():
162
  output = gr.Audio(label="JARVIS", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
163
+ gr.Interface(fn=respond, inputs=[input, web_search], outputs=[output], live=True)
164
 
165
  if __name__ == "__main__":
166
  demo.queue(max_size=200).launch()