arabellastrange commited on
Commit
a8c00ab
1 Parent(s): e60d5a7
Files changed (2) hide show
  1. app.py +2 -3
  2. web_search.py +4 -9
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import logging
2
  import os
3
- from subprocess import check_call, STDOUT
4
  from time import asctime
5
 
6
  import gradio as gr
@@ -21,6 +20,7 @@ def google_search_chat(message, history):
21
  condensed_question = condense_question(message, history)
22
  if is_search_query(condensed_question):
23
  search_results = search(message, condensed_question)
 
24
  relevant_content = ""
25
  sources = ""
26
  for index, result in enumerate(search_results):
@@ -30,7 +30,7 @@ def google_search_chat(message, history):
30
  if relevant_content != "":
31
  documents = [Document(text=relevant_content)]
32
  index = VectorStoreIndex.from_documents(documents)
33
-
34
  response = generate_chat_response_with_history_rag_return_response(index, message, history)
35
 
36
  # similar_str = "not calculated"
@@ -105,7 +105,6 @@ if __name__ == '__main__':
105
  # https://openai.com/blog/new-embedding-models-and-api-updates
106
  set_llm(key=api_key, model="gpt-4-0125-preview", temperature=0)
107
 
108
-
109
  print("Launching Gradio ChatInterface for searchbot...")
110
 
111
  demo = gr.ChatInterface(fn=google_search_chat,
 
1
  import logging
2
  import os
 
3
  from time import asctime
4
 
5
  import gradio as gr
 
20
  condensed_question = condense_question(message, history)
21
  if is_search_query(condensed_question):
22
  search_results = search(message, condensed_question)
23
+ print(f'Search results returned: {len(search_results)}')
24
  relevant_content = ""
25
  sources = ""
26
  for index, result in enumerate(search_results):
 
30
  if relevant_content != "":
31
  documents = [Document(text=relevant_content)]
32
  index = VectorStoreIndex.from_documents(documents)
33
+ print('Search results vectorized...')
34
  response = generate_chat_response_with_history_rag_return_response(index, message, history)
35
 
36
  # similar_str = "not calculated"
 
105
  # https://openai.com/blog/new-embedding-models-and-api-updates
106
  set_llm(key=api_key, model="gpt-4-0125-preview", temperature=0)
107
 
 
108
  print("Launching Gradio ChatInterface for searchbot...")
109
 
110
  demo = gr.ChatInterface(fn=google_search_chat,
web_search.py CHANGED
@@ -17,9 +17,6 @@ from llmsearch import utilityV2 as ut
17
  logger = logging.getLogger("agent_logger")
18
 
19
 
20
- # todo drop blocked pages > see og llmsearch code
21
- # todo use the chatcondesemode query instead of the new gpt query
22
-
23
  def search(msg, query_phrase):
24
  try:
25
  # this call extracts keywords from the statement and rewrites it into a better search phrase with gpt3.5
@@ -63,10 +60,10 @@ def process_url(url):
63
  if len(result) > 0:
64
  if "an error has occurred" not in result.lower() and "permission to view this page" not in result.lower() and "403 ERROR" not in result.lower() and "have been blocked" not in result.lower() and "too many requests" not in result.lower():
65
  processed_page = {
66
- "source": ut.extract_domain(url),
67
- "url": url,
68
- "text": result,
69
- }
70
  print(f"Processed {url}: {len(result)} {int((time.time() - start_time) * 1000)} ms")
71
  return processed_page
72
  except Exception:
@@ -85,7 +82,6 @@ def process_urls(urls):
85
  try:
86
  with ThreadPoolExecutor(max_workers=len(urls)) as pool:
87
  for result in pool.map(process_url, urls):
88
- print(f'returned {result}')
89
  results.append(result)
90
  except:
91
  traceback.print_exc()
@@ -168,7 +164,6 @@ def search_google(original_query, query_phrase):
168
  if val is not None
169
  ]
170
  all_urls = copy.deepcopy(urls)
171
- # initialize scan of Google urls
172
  start_wall_time = time.time()
173
  full_text = process_urls(all_urls)
174
  print(f"***** urls_processed {int((time.time() - start_wall_time) * 10) / 10} sec")
 
17
  logger = logging.getLogger("agent_logger")
18
 
19
 
 
 
 
20
  def search(msg, query_phrase):
21
  try:
22
  # this call extracts keywords from the statement and rewrites it into a better search phrase with gpt3.5
 
60
  if len(result) > 0:
61
  if "an error has occurred" not in result.lower() and "permission to view this page" not in result.lower() and "403 ERROR" not in result.lower() and "have been blocked" not in result.lower() and "too many requests" not in result.lower():
62
  processed_page = {
63
+ "source": ut.extract_domain(url),
64
+ "url": url,
65
+ "text": result,
66
+ }
67
  print(f"Processed {url}: {len(result)} {int((time.time() - start_time) * 1000)} ms")
68
  return processed_page
69
  except Exception:
 
82
  try:
83
  with ThreadPoolExecutor(max_workers=len(urls)) as pool:
84
  for result in pool.map(process_url, urls):
 
85
  results.append(result)
86
  except:
87
  traceback.print_exc()
 
164
  if val is not None
165
  ]
166
  all_urls = copy.deepcopy(urls)
 
167
  start_wall_time = time.time()
168
  full_text = process_urls(all_urls)
169
  print(f"***** urls_processed {int((time.time() - start_wall_time) * 10) / 10} sec")