Spaces:
Sleeping
Sleeping
arabellastrange
commited on
Commit
•
a8c00ab
1
Parent(s):
e60d5a7
logging
Browse files- app.py +2 -3
- web_search.py +4 -9
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import logging
|
2 |
import os
|
3 |
-
from subprocess import check_call, STDOUT
|
4 |
from time import asctime
|
5 |
|
6 |
import gradio as gr
|
@@ -21,6 +20,7 @@ def google_search_chat(message, history):
|
|
21 |
condensed_question = condense_question(message, history)
|
22 |
if is_search_query(condensed_question):
|
23 |
search_results = search(message, condensed_question)
|
|
|
24 |
relevant_content = ""
|
25 |
sources = ""
|
26 |
for index, result in enumerate(search_results):
|
@@ -30,7 +30,7 @@ def google_search_chat(message, history):
|
|
30 |
if relevant_content != "":
|
31 |
documents = [Document(text=relevant_content)]
|
32 |
index = VectorStoreIndex.from_documents(documents)
|
33 |
-
|
34 |
response = generate_chat_response_with_history_rag_return_response(index, message, history)
|
35 |
|
36 |
# similar_str = "not calculated"
|
@@ -105,7 +105,6 @@ if __name__ == '__main__':
|
|
105 |
# https://openai.com/blog/new-embedding-models-and-api-updates
|
106 |
set_llm(key=api_key, model="gpt-4-0125-preview", temperature=0)
|
107 |
|
108 |
-
|
109 |
print("Launching Gradio ChatInterface for searchbot...")
|
110 |
|
111 |
demo = gr.ChatInterface(fn=google_search_chat,
|
|
|
1 |
import logging
|
2 |
import os
|
|
|
3 |
from time import asctime
|
4 |
|
5 |
import gradio as gr
|
|
|
20 |
condensed_question = condense_question(message, history)
|
21 |
if is_search_query(condensed_question):
|
22 |
search_results = search(message, condensed_question)
|
23 |
+
print(f'Search results returned: {len(search_results)}')
|
24 |
relevant_content = ""
|
25 |
sources = ""
|
26 |
for index, result in enumerate(search_results):
|
|
|
30 |
if relevant_content != "":
|
31 |
documents = [Document(text=relevant_content)]
|
32 |
index = VectorStoreIndex.from_documents(documents)
|
33 |
+
print('Search results vectorized...')
|
34 |
response = generate_chat_response_with_history_rag_return_response(index, message, history)
|
35 |
|
36 |
# similar_str = "not calculated"
|
|
|
105 |
# https://openai.com/blog/new-embedding-models-and-api-updates
|
106 |
set_llm(key=api_key, model="gpt-4-0125-preview", temperature=0)
|
107 |
|
|
|
108 |
print("Launching Gradio ChatInterface for searchbot...")
|
109 |
|
110 |
demo = gr.ChatInterface(fn=google_search_chat,
|
web_search.py
CHANGED
@@ -17,9 +17,6 @@ from llmsearch import utilityV2 as ut
|
|
17 |
logger = logging.getLogger("agent_logger")
|
18 |
|
19 |
|
20 |
-
# todo drop blocked pages > see og llmsearch code
|
21 |
-
# todo use the chatcondesemode query instead of the new gpt query
|
22 |
-
|
23 |
def search(msg, query_phrase):
|
24 |
try:
|
25 |
# this call extracts keywords from the statement and rewrites it into a better search phrase with gpt3.5
|
@@ -63,10 +60,10 @@ def process_url(url):
|
|
63 |
if len(result) > 0:
|
64 |
if "an error has occurred" not in result.lower() and "permission to view this page" not in result.lower() and "403 ERROR" not in result.lower() and "have been blocked" not in result.lower() and "too many requests" not in result.lower():
|
65 |
processed_page = {
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
print(f"Processed {url}: {len(result)} {int((time.time() - start_time) * 1000)} ms")
|
71 |
return processed_page
|
72 |
except Exception:
|
@@ -85,7 +82,6 @@ def process_urls(urls):
|
|
85 |
try:
|
86 |
with ThreadPoolExecutor(max_workers=len(urls)) as pool:
|
87 |
for result in pool.map(process_url, urls):
|
88 |
-
print(f'returned {result}')
|
89 |
results.append(result)
|
90 |
except:
|
91 |
traceback.print_exc()
|
@@ -168,7 +164,6 @@ def search_google(original_query, query_phrase):
|
|
168 |
if val is not None
|
169 |
]
|
170 |
all_urls = copy.deepcopy(urls)
|
171 |
-
# initialize scan of Google urls
|
172 |
start_wall_time = time.time()
|
173 |
full_text = process_urls(all_urls)
|
174 |
print(f"***** urls_processed {int((time.time() - start_wall_time) * 10) / 10} sec")
|
|
|
17 |
logger = logging.getLogger("agent_logger")
|
18 |
|
19 |
|
|
|
|
|
|
|
20 |
def search(msg, query_phrase):
|
21 |
try:
|
22 |
# this call extracts keywords from the statement and rewrites it into a better search phrase with gpt3.5
|
|
|
60 |
if len(result) > 0:
|
61 |
if "an error has occurred" not in result.lower() and "permission to view this page" not in result.lower() and "403 ERROR" not in result.lower() and "have been blocked" not in result.lower() and "too many requests" not in result.lower():
|
62 |
processed_page = {
|
63 |
+
"source": ut.extract_domain(url),
|
64 |
+
"url": url,
|
65 |
+
"text": result,
|
66 |
+
}
|
67 |
print(f"Processed {url}: {len(result)} {int((time.time() - start_time) * 1000)} ms")
|
68 |
return processed_page
|
69 |
except Exception:
|
|
|
82 |
try:
|
83 |
with ThreadPoolExecutor(max_workers=len(urls)) as pool:
|
84 |
for result in pool.map(process_url, urls):
|
|
|
85 |
results.append(result)
|
86 |
except:
|
87 |
traceback.print_exc()
|
|
|
164 |
if val is not None
|
165 |
]
|
166 |
all_urls = copy.deepcopy(urls)
|
|
|
167 |
start_wall_time = time.time()
|
168 |
full_text = process_urls(all_urls)
|
169 |
print(f"***** urls_processed {int((time.time() - start_wall_time) * 10) / 10} sec")
|