Spaces:
Sleeping
Sleeping
arabellastrange
commited on
Commit
•
428be9a
1
Parent(s):
d29bff5
added fallback documents, trying to make gpt responses longer
Browse files- .gitattributes +1 -0
- app.py +12 -48
- generate_response.py +5 -4
- read_write_index.py +1 -2
- search_data/7 of the World’s Most Poisonous Mushrooms _ Britannica.txt +3 -0
- search_data/Advanced Outlook.com security for Microsoft 365 subscribers - Microsoft Support.pdf +3 -0
- search_data/DIY egg substitutes _ PCC Community Markets.pdf +3 -0
- search_data/Delay or schedule sending email messages in Outlook - Microsoft Support.pdf +3 -0
- search_data/Eggless Carrot Cake Recipe - Swasthi's Recipes.pdf +3 -0
- search_data/Eggless Carrot Walnut Cake - MJ and Hungryman.pdf +3 -0
- search_data/Foraging for Mushrooms in Italy.pdf +3 -0
- search_data/Mushroom hunting in Italy - Forests and trees _ funghimagazine.it.pdf +3 -0
- search_data/Mushroom poisoning - Wikipedia.pdf +3 -0
- search_data/Post-Impressionism - Wikipedia.pdf +3 -0
- search_data/Post-Impressionism Movement Overview _ TheArtStory.pdf +3 -0
- search_data/Post-Impressionism _ Cézanne, Van Gogh & Gauguin _ Britannica.pdf +3 -0
- search_data/Post-Impressionism _ Essay _ The Metropolitan Museum of Art _ Heilbrunn Timeline of Art History.pdf +3 -0
- search_data/Proton Mail_ Get a private, secure, and encrypted email account _ Proton.pdf +3 -0
- search_data/Scheduling emails and HIPAA compliance.pdf +3 -0
- search_data/Security and Privacy in Outlook Web App - Microsoft Support.pdf +3 -0
- search_data/Tree nut and peanut allergy _ healthdirect.pdf +3 -0
.gitattributes
CHANGED
@@ -44,3 +44,4 @@ chromedriver-linux64/ filter=lfs diff=lfs merge=lfs -text
|
|
44 |
*.md !text !filter !merge !diff
|
45 |
/chromedriver-linux64/* filter=lfs diff=lfs merge=lfs -text
|
46 |
|
|
|
|
44 |
*.md !text !filter !merge !diff
|
45 |
/chromedriver-linux64/* filter=lfs diff=lfs merge=lfs -text
|
46 |
|
47 |
+
/search_data/* filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -7,6 +7,7 @@ from llama_index.core import Document, VectorStoreIndex
|
|
7 |
|
8 |
from generate_response import generate_chat_response_with_history, set_llm, is_search_query, condense_question, \
|
9 |
generate_chat_response_with_history_rag_return_response
|
|
|
10 |
from web_search import search
|
11 |
|
12 |
API_KEY_PATH = "../keys/gpt_api_key.txt"
|
@@ -22,64 +23,27 @@ def google_search_chat(message, history):
|
|
22 |
search_results = search(message, condensed_question)
|
23 |
print(f'Search results returned: {len(search_results)}')
|
24 |
relevant_content = ""
|
25 |
-
sources = ""
|
26 |
for index, result in enumerate(search_results):
|
27 |
relevant_content = relevant_content + "\n" + ''.join(result['text'])
|
28 |
-
sources = sources + f'\n {index + 1}. ' + result['url'] # python is zero-indexed
|
29 |
|
30 |
if relevant_content != "":
|
31 |
documents = [Document(text=relevant_content)]
|
32 |
index = VectorStoreIndex.from_documents(documents)
|
33 |
print('Search results vectorized...')
|
34 |
response = generate_chat_response_with_history_rag_return_response(index, message, history)
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
# sim_evaluator = SemanticSimilarityEvaluator()
|
41 |
-
# faith_evaluator = FaithfulnessEvaluator(llm=get_llm())
|
42 |
-
# # condensed_context = condense_context(relevant_content)
|
43 |
-
# # logger.info("Calculating similarity...")
|
44 |
-
# # similar = sim_evaluator.evaluate(response=str(response),
|
45 |
-
# # reference=condensed_context)
|
46 |
-
# logger.info("Calculating faithfulness...")
|
47 |
-
# faithfulness = faith_evaluator.evaluate_response(query=condensed_question, response=response)
|
48 |
-
# # similar_str = str(round((similar.score * 100), 2)) + "%"
|
49 |
-
# faithfulness_str = "Yes" if faithfulness.passing else "No"
|
50 |
-
#
|
51 |
-
# logger.info(f'**Search Query:** {condensed_question} \n **Faithfulness:** {faithfulness_str} \n '
|
52 |
-
# f'**Similarity:** {similar_str} \n **Sources used:** \n {sources}')
|
53 |
-
|
54 |
-
response_text = []
|
55 |
-
string_output = ""
|
56 |
-
|
57 |
-
for text in response.response_gen:
|
58 |
-
response_text.append(text)
|
59 |
-
string_output = ''.join(response_text)
|
60 |
-
yield string_output
|
61 |
-
|
62 |
-
# if not sourced:
|
63 |
-
# pass
|
64 |
-
# if sourced and not query and not rag_similarity:
|
65 |
-
# yield string_output + f'\n\n --- \n **Sources used:** \n {sources}'
|
66 |
-
# if sourced and query and not rag_similarity:
|
67 |
-
# yield (string_output
|
68 |
-
# + f'\n\n --- \n **Search Query:** {condensed_question} '
|
69 |
-
# f'\n **Sources used:** \n {sources}')
|
70 |
-
# if rag_similarity:
|
71 |
-
# yield (string_output
|
72 |
-
# + f'\n\n --- \n **Search Query:** {condensed_question} \n '
|
73 |
-
# # f'**Similarity of response to the sources [ℹ️]'
|
74 |
-
# # f'(https://en.wikipedia.org/wiki/Semantic_similarity):** {similar_str} \n'
|
75 |
-
# f'**Is response in source documents?**: {faithfulness_str}'
|
76 |
-
# f'\n **Sources used:** \n {sources}')
|
77 |
|
78 |
-
|
79 |
-
else:
|
80 |
-
print(
|
81 |
-
f'Assistant Response: Sorry, no search results found.')
|
82 |
-
yield "Sorry, no search results found."
|
83 |
|
84 |
else:
|
85 |
yield from generate_chat_response_with_history(message, history)
|
|
|
7 |
|
8 |
from generate_response import generate_chat_response_with_history, set_llm, is_search_query, condense_question, \
|
9 |
generate_chat_response_with_history_rag_return_response
|
10 |
+
from read_write_index import read_write_index
|
11 |
from web_search import search
|
12 |
|
13 |
API_KEY_PATH = "../keys/gpt_api_key.txt"
|
|
|
23 |
search_results = search(message, condensed_question)
|
24 |
print(f'Search results returned: {len(search_results)}')
|
25 |
relevant_content = ""
|
|
|
26 |
for index, result in enumerate(search_results):
|
27 |
relevant_content = relevant_content + "\n" + ''.join(result['text'])
|
|
|
28 |
|
29 |
if relevant_content != "":
|
30 |
documents = [Document(text=relevant_content)]
|
31 |
index = VectorStoreIndex.from_documents(documents)
|
32 |
print('Search results vectorized...')
|
33 |
response = generate_chat_response_with_history_rag_return_response(index, message, history)
|
34 |
+
else:
|
35 |
+
print(f'Assistant Response: Sorry, no search results found, trying with offline resources.')
|
36 |
+
index = read_write_index(path='storage_search/')
|
37 |
+
response = generate_chat_response_with_history_rag_return_response(index, message, history)
|
38 |
+
response_text = []
|
39 |
+
string_output = ""
|
40 |
|
41 |
+
for text in response.response_gen:
|
42 |
+
response_text.append(text)
|
43 |
+
string_output = ''.join(response_text)
|
44 |
+
yield string_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
+
print(f'Assistant Response: {string_output}')
|
|
|
|
|
|
|
|
|
47 |
|
48 |
else:
|
49 |
yield from generate_chat_response_with_history(message, history)
|
generate_response.py
CHANGED
@@ -24,7 +24,7 @@ def set_llm(model, key, temperature):
|
|
24 |
logger.info(f'Setting up LLM with {model} and associated embedding model...')
|
25 |
|
26 |
if "gpt" in model:
|
27 |
-
llm = OpenAI(api_key=key, temperature=temperature, model=model)
|
28 |
embed_model = OpenAIEmbedding(api_key=key)
|
29 |
elif "mistral" in model:
|
30 |
llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
|
@@ -49,7 +49,7 @@ def generate_query_response(index, message):
|
|
49 |
query_engine = index.as_query_engine(streaming=True, chat_mode=ChatMode.CONDENSE_QUESTION)
|
50 |
|
51 |
logger.info(f'Input user message: {message}')
|
52 |
-
response = query_engine.query(message)
|
53 |
|
54 |
response_text = []
|
55 |
for text in response.response_gen:
|
@@ -75,7 +75,7 @@ def generate_chat_response_with_history(message, history):
|
|
75 |
|
76 |
def generate_chat_response_with_history_rag_return_response(index, message, history):
|
77 |
logger.info("Generating chat response with history and rag...")
|
78 |
-
|
79 |
messages = collect_history(message, history)
|
80 |
|
81 |
logger.info("Creating query engine with index...")
|
@@ -83,9 +83,10 @@ def generate_chat_response_with_history_rag_return_response(index, message, hist
|
|
83 |
return query_engine.stream_chat(messages)
|
84 |
|
85 |
|
86 |
-
|
87 |
def generate_chat_response_with_history_rag_yield_string(index, message, history):
|
88 |
logger.info("Generating chat response with history and rag...")
|
|
|
|
|
89 |
string_output = ""
|
90 |
|
91 |
messages = collect_history(message, history)
|
|
|
24 |
logger.info(f'Setting up LLM with {model} and associated embedding model...')
|
25 |
|
26 |
if "gpt" in model:
|
27 |
+
llm = OpenAI(api_key=key, temperature=temperature, model=model, )
|
28 |
embed_model = OpenAIEmbedding(api_key=key)
|
29 |
elif "mistral" in model:
|
30 |
llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
|
|
|
49 |
query_engine = index.as_query_engine(streaming=True, chat_mode=ChatMode.CONDENSE_QUESTION)
|
50 |
|
51 |
logger.info(f'Input user message: {message}')
|
52 |
+
response = query_engine.query(f"Write a comprehensive but concise response to this query: \n '{message}'")
|
53 |
|
54 |
response_text = []
|
55 |
for text in response.response_gen:
|
|
|
75 |
|
76 |
def generate_chat_response_with_history_rag_return_response(index, message, history):
|
77 |
logger.info("Generating chat response with history and rag...")
|
78 |
+
message = f"Write a comprehensive but concise response to this query: \n '{message}'"
|
79 |
messages = collect_history(message, history)
|
80 |
|
81 |
logger.info("Creating query engine with index...")
|
|
|
83 |
return query_engine.stream_chat(messages)
|
84 |
|
85 |
|
|
|
86 |
def generate_chat_response_with_history_rag_yield_string(index, message, history):
|
87 |
logger.info("Generating chat response with history and rag...")
|
88 |
+
message = f"Write a comprehensive but concise response to this query: \n '{message}'"
|
89 |
+
|
90 |
string_output = ""
|
91 |
|
92 |
messages = collect_history(message, history)
|
read_write_index.py
CHANGED
@@ -4,8 +4,7 @@ import os
|
|
4 |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
|
5 |
|
6 |
logger = logging.getLogger(__name__)
|
7 |
-
DOCUMENT_PATH = '
|
8 |
-
|
9 |
|
10 |
|
11 |
# remember to delete stored vectors when new documents are added to the data so the storage is recreated
|
|
|
4 |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
|
5 |
|
6 |
logger = logging.getLogger(__name__)
|
7 |
+
DOCUMENT_PATH = 'search_data/'
|
|
|
8 |
|
9 |
|
10 |
# remember to delete stored vectors when new documents are added to the data so the storage is recreated
|
search_data/7 of the World’s Most Poisonous Mushrooms _ Britannica.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a21b2ba1a09501339d689875a777692913dd6e2c24f54896e8785c7a8c5b2dca
|
3 |
+
size 23420
|
search_data/Advanced Outlook.com security for Microsoft 365 subscribers - Microsoft Support.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b37382476a7b8ed5f126f070ee532b3d57988b7b71df67500ba3abc62f49f13
|
3 |
+
size 291345
|
search_data/DIY egg substitutes _ PCC Community Markets.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bc96523f1bc10ea200c8b48c7537da529a6dafb55de0dca8c94400954beb760
|
3 |
+
size 102492
|
search_data/Delay or schedule sending email messages in Outlook - Microsoft Support.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73f34aa19820fdff64c68ee9b504303eafefff95ef04b497a0c7494731e775bb
|
3 |
+
size 322375
|
search_data/Eggless Carrot Cake Recipe - Swasthi's Recipes.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e76cb0a825197235b937bc661f2b5364d54582489214c3eb29670a3035a5d8af
|
3 |
+
size 11134105
|
search_data/Eggless Carrot Walnut Cake - MJ and Hungryman.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bcc010c6536a4aacc516cd89a988845afd63bcb3a2bcef4aa17385673d7c1ba
|
3 |
+
size 3122188
|
search_data/Foraging for Mushrooms in Italy.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64091c4875852606d124cf9f163ed718c76028ac722916f12674b7c6b4fefc87
|
3 |
+
size 45431380
|
search_data/Mushroom hunting in Italy - Forests and trees _ funghimagazine.it.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5277ebdadbd744c565bfc64563836732727833e3b4e2f20037e7ccf791dedde4
|
3 |
+
size 32428069
|
search_data/Mushroom poisoning - Wikipedia.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:891de6630d9a405e74c9ec62c574dbf0480ba4bc4fad3b56a627c4a4fbeb1748
|
3 |
+
size 1885015
|
search_data/Post-Impressionism - Wikipedia.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf23c67aaf764f7627186013276ef4f12ae9deb34248561eef576255ef17be8e
|
3 |
+
size 6314276
|
search_data/Post-Impressionism Movement Overview _ TheArtStory.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:011a9f805549f63ff405408df6a6b996f9aca3bc685da99684b3de9ed855f12e
|
3 |
+
size 660112
|
search_data/Post-Impressionism _ Cézanne, Van Gogh & Gauguin _ Britannica.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4253305346b05efeabca0b81a90a31e0f19d0e8a86411bebd7d70b29f82b2e41
|
3 |
+
size 3106606
|
search_data/Post-Impressionism _ Essay _ The Metropolitan Museum of Art _ Heilbrunn Timeline of Art History.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:838cebda5a22471967d460f6188beb7ced29c0c0f301caf8fa2b68a474f437d1
|
3 |
+
size 112676
|
search_data/Proton Mail_ Get a private, secure, and encrypted email account _ Proton.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7600547cebb3b26121c6456c95b04deb46a28e1f75d843d4a2c24764f05f425d
|
3 |
+
size 7216995
|
search_data/Scheduling emails and HIPAA compliance.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f120b3bbb190d853664a6cad818469bfd10b50d709fa9d9a3a5706953606a32
|
3 |
+
size 3679160
|
search_data/Security and Privacy in Outlook Web App - Microsoft Support.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75f8b44657e3f6513610690f2dfcbc86dbf31b4a005aa7ee6632a4a750513c8c
|
3 |
+
size 228545
|
search_data/Tree nut and peanut allergy _ healthdirect.pdf
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bdd079b0fec4f37a99ede3df4ba2be51156162bd2b03ca435dbb7bb8bde51d4
|
3 |
+
size 235370
|