Spaces:

arabellastrange
/

search-assistant

Sleeping

App Files Files

arabellastrange commited on Jul 29, 2024

Commit

428be9a

•

1 Parent(s): d29bff5

added fallback documents, trying to make gpt responses longer

Browse files

Files changed (21) hide show

.gitattributes +1 -0
app.py +12 -48
generate_response.py +5 -4
read_write_index.py +1 -2
search_data/7 of the World’s Most Poisonous Mushrooms _ Britannica.txt +3 -0
search_data/Advanced Outlook.com security for Microsoft 365 subscribers - Microsoft Support.pdf +3 -0
search_data/DIY egg substitutes _ PCC Community Markets.pdf +3 -0
search_data/Delay or schedule sending email messages in Outlook - Microsoft Support.pdf +3 -0
search_data/Eggless Carrot Cake Recipe - Swasthi's Recipes.pdf +3 -0
search_data/Eggless Carrot Walnut Cake - MJ and Hungryman.pdf +3 -0
search_data/Foraging for Mushrooms in Italy.pdf +3 -0
search_data/Mushroom hunting in Italy - Forests and trees _ funghimagazine.it.pdf +3 -0
search_data/Mushroom poisoning - Wikipedia.pdf +3 -0
search_data/Post-Impressionism - Wikipedia.pdf +3 -0
search_data/Post-Impressionism Movement Overview _ TheArtStory.pdf +3 -0
search_data/Post-Impressionism _ Cézanne, Van Gogh & Gauguin _ Britannica.pdf +3 -0
search_data/Post-Impressionism _ Essay _ The Metropolitan Museum of Art _ Heilbrunn Timeline of Art History.pdf +3 -0
search_data/Proton Mail_ Get a private, secure, and encrypted email account _ Proton.pdf +3 -0
search_data/Scheduling emails and HIPAA compliance.pdf +3 -0
search_data/Security and Privacy in Outlook Web App - Microsoft Support.pdf +3 -0
search_data/Tree nut and peanut allergy _ healthdirect.pdf +3 -0

.gitattributes CHANGED Viewed

@@ -44,3 +44,4 @@ chromedriver-linux64/ filter=lfs diff=lfs merge=lfs -text
 *.md !text !filter !merge !diff
 /chromedriver-linux64/* filter=lfs diff=lfs merge=lfs -text

 *.md !text !filter !merge !diff
 /chromedriver-linux64/* filter=lfs diff=lfs merge=lfs -text
+/search_data/* filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from llama_index.core import Document, VectorStoreIndex
 from generate_response import generate_chat_response_with_history, set_llm, is_search_query, condense_question, \
     generate_chat_response_with_history_rag_return_response
 from web_search import search
 API_KEY_PATH = "../keys/gpt_api_key.txt"
@@ -22,64 +23,27 @@ def google_search_chat(message, history):
         search_results = search(message, condensed_question)
         print(f'Search results returned: {len(search_results)}')
         relevant_content = ""
-        sources = ""
         for index, result in enumerate(search_results):
             relevant_content = relevant_content + "\n" + ''.join(result['text'])
-            sources = sources + f'\n {index + 1}. ' + result['url']  # python is zero-indexed
         if relevant_content != "":
             documents = [Document(text=relevant_content)]
             index = VectorStoreIndex.from_documents(documents)
             print('Search results vectorized...')
             response = generate_chat_response_with_history_rag_return_response(index, message, history)
-            # similar_str = "not calculated"
-            # faithfulness_str = "not calculated"
-            #
-            # if rag_similarity:
-            #     sim_evaluator = SemanticSimilarityEvaluator()
-            #     faith_evaluator = FaithfulnessEvaluator(llm=get_llm())
-            #     # condensed_context = condense_context(relevant_content)
-            #     # logger.info("Calculating similarity...")
-            #     # similar = sim_evaluator.evaluate(response=str(response),
-            #     #                                   reference=condensed_context)
-            #     logger.info("Calculating faithfulness...")
-            #     faithfulness = faith_evaluator.evaluate_response(query=condensed_question, response=response)
-            #     # similar_str = str(round((similar.score * 100), 2)) + "%"
-            #     faithfulness_str = "Yes" if faithfulness.passing else "No"
-            #
-            # logger.info(f'**Search Query:** {condensed_question} \n **Faithfulness:** {faithfulness_str} \n '
-            #             f'**Similarity:** {similar_str} \n **Sources used:** \n {sources}')
-            response_text = []
-            string_output = ""
-            for text in response.response_gen:
-                response_text.append(text)
-                string_output = ''.join(response_text)
-                yield string_output
-            # if not sourced:
-            #     pass
-            # if sourced and not query and not rag_similarity:
-            #     yield string_output + f'\n\n --- \n **Sources used:** \n {sources}'
-            # if sourced and query and not rag_similarity:
-            #     yield (string_output
-            #            + f'\n\n --- \n **Search Query:** {condensed_question} '
-            #              f'\n **Sources used:** \n {sources}')
-            # if rag_similarity:
-            #     yield (string_output
-            #            + f'\n\n --- \n **Search Query:** {condensed_question} \n '
-            #            # f'**Similarity of response to the sources [ℹ️]'
-            #            # f'(https://en.wikipedia.org/wiki/Semantic_similarity):** {similar_str} \n'
-            #              f'**Is response in source documents?**: {faithfulness_str}'
-            #              f'\n **Sources used:** \n {sources}')
-            print(f'Assistant Response: {string_output}')
-        else:
-            print(
-                f'Assistant Response: Sorry, no search results found.')
-            yield "Sorry, no search results found."
     else:
         yield from generate_chat_response_with_history(message, history)

 from generate_response import generate_chat_response_with_history, set_llm, is_search_query, condense_question, \
     generate_chat_response_with_history_rag_return_response
+from read_write_index import read_write_index
 from web_search import search
 API_KEY_PATH = "../keys/gpt_api_key.txt"
         search_results = search(message, condensed_question)
         print(f'Search results returned: {len(search_results)}')
         relevant_content = ""
         for index, result in enumerate(search_results):
             relevant_content = relevant_content + "\n" + ''.join(result['text'])
         if relevant_content != "":
             documents = [Document(text=relevant_content)]
             index = VectorStoreIndex.from_documents(documents)
             print('Search results vectorized...')
             response = generate_chat_response_with_history_rag_return_response(index, message, history)
+        else:
+            print(f'Assistant Response: Sorry, no search results found, trying with offline resources.')
+            index = read_write_index(path='storage_search/')
+            response = generate_chat_response_with_history_rag_return_response(index, message, history)
+        response_text = []
+        string_output = ""
+        for text in response.response_gen:
+            response_text.append(text)
+            string_output = ''.join(response_text)
+            yield string_output
+        print(f'Assistant Response: {string_output}')
     else:
         yield from generate_chat_response_with_history(message, history)

generate_response.py CHANGED Viewed

@@ -24,7 +24,7 @@ def set_llm(model, key, temperature):
     logger.info(f'Setting up LLM with {model} and associated embedding model...')
     if "gpt" in model:
-        llm = OpenAI(api_key=key, temperature=temperature, model=model)
         embed_model = OpenAIEmbedding(api_key=key)
     elif "mistral" in model:
         llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
@@ -49,7 +49,7 @@ def generate_query_response(index, message):
     query_engine = index.as_query_engine(streaming=True, chat_mode=ChatMode.CONDENSE_QUESTION)
     logger.info(f'Input user message: {message}')
-    response = query_engine.query(message)
     response_text = []
     for text in response.response_gen:
@@ -75,7 +75,7 @@ def generate_chat_response_with_history(message, history):
 def generate_chat_response_with_history_rag_return_response(index, message, history):
     logger.info("Generating chat response with history and rag...")
     messages = collect_history(message, history)
     logger.info("Creating query engine with index...")
@@ -83,9 +83,10 @@ def generate_chat_response_with_history_rag_return_response(index, message, hist
     return query_engine.stream_chat(messages)
 def generate_chat_response_with_history_rag_yield_string(index, message, history):
     logger.info("Generating chat response with history and rag...")
     string_output = ""
     messages = collect_history(message, history)

     logger.info(f'Setting up LLM with {model} and associated embedding model...')
     if "gpt" in model:
+        llm = OpenAI(api_key=key, temperature=temperature, model=model, )
         embed_model = OpenAIEmbedding(api_key=key)
     elif "mistral" in model:
         llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
     query_engine = index.as_query_engine(streaming=True, chat_mode=ChatMode.CONDENSE_QUESTION)
     logger.info(f'Input user message: {message}')
+    response = query_engine.query(f"Write a comprehensive but concise response to this query: \n '{message}'")
     response_text = []
     for text in response.response_gen:
 def generate_chat_response_with_history_rag_return_response(index, message, history):
     logger.info("Generating chat response with history and rag...")
+    message = f"Write a comprehensive but concise response to this query: \n '{message}'"
     messages = collect_history(message, history)
     logger.info("Creating query engine with index...")
     return query_engine.stream_chat(messages)
 def generate_chat_response_with_history_rag_yield_string(index, message, history):
     logger.info("Generating chat response with history and rag...")
+    message = f"Write a comprehensive but concise response to this query: \n '{message}'"
     string_output = ""
     messages = collect_history(message, history)

read_write_index.py CHANGED Viewed

@@ -4,8 +4,7 @@ import os
 from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
 logger = logging.getLogger(__name__)
-DOCUMENT_PATH = '../data'
 # remember to delete stored vectors when new documents are added to the data so the storage is recreated

 from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
 logger = logging.getLogger(__name__)
+DOCUMENT_PATH = 'search_data/'
 # remember to delete stored vectors when new documents are added to the data so the storage is recreated

search_data/7 of the World’s Most Poisonous Mushrooms _ Britannica.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a21b2ba1a09501339d689875a777692913dd6e2c24f54896e8785c7a8c5b2dca
+size 23420

search_data/Advanced Outlook.com security for Microsoft 365 subscribers - Microsoft Support.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b37382476a7b8ed5f126f070ee532b3d57988b7b71df67500ba3abc62f49f13
+size 291345

search_data/DIY egg substitutes _ PCC Community Markets.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bc96523f1bc10ea200c8b48c7537da529a6dafb55de0dca8c94400954beb760
+size 102492

search_data/Delay or schedule sending email messages in Outlook - Microsoft Support.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73f34aa19820fdff64c68ee9b504303eafefff95ef04b497a0c7494731e775bb
+size 322375

search_data/Eggless Carrot Cake Recipe - Swasthi's Recipes.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e76cb0a825197235b937bc661f2b5364d54582489214c3eb29670a3035a5d8af
+size 11134105

search_data/Eggless Carrot Walnut Cake - MJ and Hungryman.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bcc010c6536a4aacc516cd89a988845afd63bcb3a2bcef4aa17385673d7c1ba
+size 3122188

search_data/Foraging for Mushrooms in Italy.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:64091c4875852606d124cf9f163ed718c76028ac722916f12674b7c6b4fefc87
+size 45431380

search_data/Mushroom hunting in Italy - Forests and trees _ funghimagazine.it.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5277ebdadbd744c565bfc64563836732727833e3b4e2f20037e7ccf791dedde4
+size 32428069

search_data/Mushroom poisoning - Wikipedia.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:891de6630d9a405e74c9ec62c574dbf0480ba4bc4fad3b56a627c4a4fbeb1748
+size 1885015

search_data/Post-Impressionism - Wikipedia.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf23c67aaf764f7627186013276ef4f12ae9deb34248561eef576255ef17be8e
+size 6314276

search_data/Post-Impressionism Movement Overview _ TheArtStory.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:011a9f805549f63ff405408df6a6b996f9aca3bc685da99684b3de9ed855f12e
+size 660112

search_data/Post-Impressionism _ Cézanne, Van Gogh & Gauguin _ Britannica.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4253305346b05efeabca0b81a90a31e0f19d0e8a86411bebd7d70b29f82b2e41
+size 3106606

search_data/Post-Impressionism _ Essay _ The Metropolitan Museum of Art _ Heilbrunn Timeline of Art History.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:838cebda5a22471967d460f6188beb7ced29c0c0f301caf8fa2b68a474f437d1
+size 112676

search_data/Proton Mail_ Get a private, secure, and encrypted email account _ Proton.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7600547cebb3b26121c6456c95b04deb46a28e1f75d843d4a2c24764f05f425d
+size 7216995

search_data/Scheduling emails and HIPAA compliance.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f120b3bbb190d853664a6cad818469bfd10b50d709fa9d9a3a5706953606a32
+size 3679160

search_data/Security and Privacy in Outlook Web App - Microsoft Support.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:75f8b44657e3f6513610690f2dfcbc86dbf31b4a005aa7ee6632a4a750513c8c
+size 228545

search_data/Tree nut and peanut allergy _ healthdirect.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6bdd079b0fec4f37a99ede3df4ba2be51156162bd2b03ca435dbb7bb8bde51d4
+size 235370