Spaces:

arabellastrange
/

search-assistant

Sleeping

App Files Files

arabellastrange commited on Jul 26, 2024

Commit

a8c00ab

•

1 Parent(s): e60d5a7

logging

Browse files

Files changed (2) hide show

app.py +2 -3
web_search.py +4 -9

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import logging
 import os
-from subprocess import check_call, STDOUT
 from time import asctime
 import gradio as gr
@@ -21,6 +20,7 @@ def google_search_chat(message, history):
     condensed_question = condense_question(message, history)
     if is_search_query(condensed_question):
         search_results = search(message, condensed_question)
         relevant_content = ""
         sources = ""
         for index, result in enumerate(search_results):
@@ -30,7 +30,7 @@ def google_search_chat(message, history):
         if relevant_content != "":
             documents = [Document(text=relevant_content)]
             index = VectorStoreIndex.from_documents(documents)
             response = generate_chat_response_with_history_rag_return_response(index, message, history)
             # similar_str = "not calculated"
@@ -105,7 +105,6 @@ if __name__ == '__main__':
     # https://openai.com/blog/new-embedding-models-and-api-updates
     set_llm(key=api_key, model="gpt-4-0125-preview", temperature=0)
     print("Launching Gradio ChatInterface for searchbot...")
     demo = gr.ChatInterface(fn=google_search_chat,

 import logging
 import os
 from time import asctime
 import gradio as gr
     condensed_question = condense_question(message, history)
     if is_search_query(condensed_question):
         search_results = search(message, condensed_question)
+        print(f'Search results returned: {len(search_results)}')
         relevant_content = ""
         sources = ""
         for index, result in enumerate(search_results):
         if relevant_content != "":
             documents = [Document(text=relevant_content)]
             index = VectorStoreIndex.from_documents(documents)
+            print('Search results vectorized...')
             response = generate_chat_response_with_history_rag_return_response(index, message, history)
             # similar_str = "not calculated"
     # https://openai.com/blog/new-embedding-models-and-api-updates
     set_llm(key=api_key, model="gpt-4-0125-preview", temperature=0)
     print("Launching Gradio ChatInterface for searchbot...")
     demo = gr.ChatInterface(fn=google_search_chat,

web_search.py CHANGED Viewed

@@ -17,9 +17,6 @@ from llmsearch import utilityV2 as ut
 logger = logging.getLogger("agent_logger")
-# todo drop blocked pages > see og llmsearch code
-# todo use the chatcondesemode query instead of the new gpt query
 def search(msg, query_phrase):
     try:
         # this call extracts keywords from the statement and rewrites it into a better search phrase with gpt3.5
@@ -63,10 +60,10 @@ def process_url(url):
                 if len(result) > 0:
                     if "an error has occurred" not in result.lower() and "permission to view this page" not in result.lower() and "403 ERROR" not in result.lower() and "have been blocked" not in result.lower() and "too many requests" not in result.lower():
                         processed_page = {
-                                "source": ut.extract_domain(url),
-                                "url": url,
-                                "text": result,
-                            }
                     print(f"Processed {url}: {len(result)} {int((time.time() - start_time) * 1000)} ms")
                     return processed_page
             except Exception:
@@ -85,7 +82,6 @@ def process_urls(urls):
     try:
         with ThreadPoolExecutor(max_workers=len(urls)) as pool:
             for result in pool.map(process_url, urls):
-                print(f'returned {result}')
                 results.append(result)
     except:
         traceback.print_exc()
@@ -168,7 +164,6 @@ def search_google(original_query, query_phrase):
             if val is not None
         ]
         all_urls = copy.deepcopy(urls)
-        # initialize scan of Google urls
         start_wall_time = time.time()
         full_text = process_urls(all_urls)
         print(f"***** urls_processed {int((time.time() - start_wall_time) * 10) / 10} sec")

 logger = logging.getLogger("agent_logger")
 def search(msg, query_phrase):
     try:
         # this call extracts keywords from the statement and rewrites it into a better search phrase with gpt3.5
                 if len(result) > 0:
                     if "an error has occurred" not in result.lower() and "permission to view this page" not in result.lower() and "403 ERROR" not in result.lower() and "have been blocked" not in result.lower() and "too many requests" not in result.lower():
                         processed_page = {
+                            "source": ut.extract_domain(url),
+                            "url": url,
+                            "text": result,
+                        }
                     print(f"Processed {url}: {len(result)} {int((time.time() - start_time) * 1000)} ms")
                     return processed_page
             except Exception:
     try:
         with ThreadPoolExecutor(max_workers=len(urls)) as pool:
             for result in pool.map(process_url, urls):
                 results.append(result)
     except:
         traceback.print_exc()
             if val is not None
         ]
         all_urls = copy.deepcopy(urls)
         start_wall_time = time.time()
         full_text = process_urls(all_urls)
         print(f"***** urls_processed {int((time.time() - start_wall_time) * 10) / 10} sec")