arabellastrange commited on
Commit
428be9a
1 Parent(s): d29bff5

added fallback documents, trying to make gpt responses longer

Browse files
.gitattributes CHANGED
@@ -44,3 +44,4 @@ chromedriver-linux64/ filter=lfs diff=lfs merge=lfs -text
44
  *.md !text !filter !merge !diff
45
  /chromedriver-linux64/* filter=lfs diff=lfs merge=lfs -text
46
 
 
 
44
  *.md !text !filter !merge !diff
45
  /chromedriver-linux64/* filter=lfs diff=lfs merge=lfs -text
46
 
47
+ /search_data/* filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -7,6 +7,7 @@ from llama_index.core import Document, VectorStoreIndex
7
 
8
  from generate_response import generate_chat_response_with_history, set_llm, is_search_query, condense_question, \
9
  generate_chat_response_with_history_rag_return_response
 
10
  from web_search import search
11
 
12
  API_KEY_PATH = "../keys/gpt_api_key.txt"
@@ -22,64 +23,27 @@ def google_search_chat(message, history):
22
  search_results = search(message, condensed_question)
23
  print(f'Search results returned: {len(search_results)}')
24
  relevant_content = ""
25
- sources = ""
26
  for index, result in enumerate(search_results):
27
  relevant_content = relevant_content + "\n" + ''.join(result['text'])
28
- sources = sources + f'\n {index + 1}. ' + result['url'] # python is zero-indexed
29
 
30
  if relevant_content != "":
31
  documents = [Document(text=relevant_content)]
32
  index = VectorStoreIndex.from_documents(documents)
33
  print('Search results vectorized...')
34
  response = generate_chat_response_with_history_rag_return_response(index, message, history)
 
 
 
 
 
 
35
 
36
- # similar_str = "not calculated"
37
- # faithfulness_str = "not calculated"
38
- #
39
- # if rag_similarity:
40
- # sim_evaluator = SemanticSimilarityEvaluator()
41
- # faith_evaluator = FaithfulnessEvaluator(llm=get_llm())
42
- # # condensed_context = condense_context(relevant_content)
43
- # # logger.info("Calculating similarity...")
44
- # # similar = sim_evaluator.evaluate(response=str(response),
45
- # # reference=condensed_context)
46
- # logger.info("Calculating faithfulness...")
47
- # faithfulness = faith_evaluator.evaluate_response(query=condensed_question, response=response)
48
- # # similar_str = str(round((similar.score * 100), 2)) + "%"
49
- # faithfulness_str = "Yes" if faithfulness.passing else "No"
50
- #
51
- # logger.info(f'**Search Query:** {condensed_question} \n **Faithfulness:** {faithfulness_str} \n '
52
- # f'**Similarity:** {similar_str} \n **Sources used:** \n {sources}')
53
-
54
- response_text = []
55
- string_output = ""
56
-
57
- for text in response.response_gen:
58
- response_text.append(text)
59
- string_output = ''.join(response_text)
60
- yield string_output
61
-
62
- # if not sourced:
63
- # pass
64
- # if sourced and not query and not rag_similarity:
65
- # yield string_output + f'\n\n --- \n **Sources used:** \n {sources}'
66
- # if sourced and query and not rag_similarity:
67
- # yield (string_output
68
- # + f'\n\n --- \n **Search Query:** {condensed_question} '
69
- # f'\n **Sources used:** \n {sources}')
70
- # if rag_similarity:
71
- # yield (string_output
72
- # + f'\n\n --- \n **Search Query:** {condensed_question} \n '
73
- # # f'**Similarity of response to the sources [ℹ️]'
74
- # # f'(https://en.wikipedia.org/wiki/Semantic_similarity):** {similar_str} \n'
75
- # f'**Is response in source documents?**: {faithfulness_str}'
76
- # f'\n **Sources used:** \n {sources}')
77
 
78
- print(f'Assistant Response: {string_output}')
79
- else:
80
- print(
81
- f'Assistant Response: Sorry, no search results found.')
82
- yield "Sorry, no search results found."
83
 
84
  else:
85
  yield from generate_chat_response_with_history(message, history)
 
7
 
8
  from generate_response import generate_chat_response_with_history, set_llm, is_search_query, condense_question, \
9
  generate_chat_response_with_history_rag_return_response
10
+ from read_write_index import read_write_index
11
  from web_search import search
12
 
13
  API_KEY_PATH = "../keys/gpt_api_key.txt"
 
23
  search_results = search(message, condensed_question)
24
  print(f'Search results returned: {len(search_results)}')
25
  relevant_content = ""
 
26
  for index, result in enumerate(search_results):
27
  relevant_content = relevant_content + "\n" + ''.join(result['text'])
 
28
 
29
  if relevant_content != "":
30
  documents = [Document(text=relevant_content)]
31
  index = VectorStoreIndex.from_documents(documents)
32
  print('Search results vectorized...')
33
  response = generate_chat_response_with_history_rag_return_response(index, message, history)
34
+ else:
35
+ print(f'Assistant Response: Sorry, no search results found, trying with offline resources.')
36
+ index = read_write_index(path='storage_search/')
37
+ response = generate_chat_response_with_history_rag_return_response(index, message, history)
38
+ response_text = []
39
+ string_output = ""
40
 
41
+ for text in response.response_gen:
42
+ response_text.append(text)
43
+ string_output = ''.join(response_text)
44
+ yield string_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ print(f'Assistant Response: {string_output}')
 
 
 
 
47
 
48
  else:
49
  yield from generate_chat_response_with_history(message, history)
generate_response.py CHANGED
@@ -24,7 +24,7 @@ def set_llm(model, key, temperature):
24
  logger.info(f'Setting up LLM with {model} and associated embedding model...')
25
 
26
  if "gpt" in model:
27
- llm = OpenAI(api_key=key, temperature=temperature, model=model)
28
  embed_model = OpenAIEmbedding(api_key=key)
29
  elif "mistral" in model:
30
  llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
@@ -49,7 +49,7 @@ def generate_query_response(index, message):
49
  query_engine = index.as_query_engine(streaming=True, chat_mode=ChatMode.CONDENSE_QUESTION)
50
 
51
  logger.info(f'Input user message: {message}')
52
- response = query_engine.query(message)
53
 
54
  response_text = []
55
  for text in response.response_gen:
@@ -75,7 +75,7 @@ def generate_chat_response_with_history(message, history):
75
 
76
  def generate_chat_response_with_history_rag_return_response(index, message, history):
77
  logger.info("Generating chat response with history and rag...")
78
-
79
  messages = collect_history(message, history)
80
 
81
  logger.info("Creating query engine with index...")
@@ -83,9 +83,10 @@ def generate_chat_response_with_history_rag_return_response(index, message, hist
83
  return query_engine.stream_chat(messages)
84
 
85
 
86
-
87
  def generate_chat_response_with_history_rag_yield_string(index, message, history):
88
  logger.info("Generating chat response with history and rag...")
 
 
89
  string_output = ""
90
 
91
  messages = collect_history(message, history)
 
24
  logger.info(f'Setting up LLM with {model} and associated embedding model...')
25
 
26
  if "gpt" in model:
27
+ llm = OpenAI(api_key=key, temperature=temperature, model=model, )
28
  embed_model = OpenAIEmbedding(api_key=key)
29
  elif "mistral" in model:
30
  llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
 
49
  query_engine = index.as_query_engine(streaming=True, chat_mode=ChatMode.CONDENSE_QUESTION)
50
 
51
  logger.info(f'Input user message: {message}')
52
+ response = query_engine.query(f"Write a comprehensive but concise response to this query: \n '{message}'")
53
 
54
  response_text = []
55
  for text in response.response_gen:
 
75
 
76
  def generate_chat_response_with_history_rag_return_response(index, message, history):
77
  logger.info("Generating chat response with history and rag...")
78
+ message = f"Write a comprehensive but concise response to this query: \n '{message}'"
79
  messages = collect_history(message, history)
80
 
81
  logger.info("Creating query engine with index...")
 
83
  return query_engine.stream_chat(messages)
84
 
85
 
 
86
  def generate_chat_response_with_history_rag_yield_string(index, message, history):
87
  logger.info("Generating chat response with history and rag...")
88
+ message = f"Write a comprehensive but concise response to this query: \n '{message}'"
89
+
90
  string_output = ""
91
 
92
  messages = collect_history(message, history)
read_write_index.py CHANGED
@@ -4,8 +4,7 @@ import os
4
  from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
5
 
6
  logger = logging.getLogger(__name__)
7
- DOCUMENT_PATH = '../data'
8
-
9
 
10
 
11
  # remember to delete stored vectors when new documents are added to the data so the storage is recreated
 
4
  from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage
5
 
6
  logger = logging.getLogger(__name__)
7
+ DOCUMENT_PATH = 'search_data/'
 
8
 
9
 
10
  # remember to delete stored vectors when new documents are added to the data so the storage is recreated
search_data/7 of the World’s Most Poisonous Mushrooms _ Britannica.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a21b2ba1a09501339d689875a777692913dd6e2c24f54896e8785c7a8c5b2dca
3
+ size 23420
search_data/Advanced Outlook.com security for Microsoft 365 subscribers - Microsoft Support.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b37382476a7b8ed5f126f070ee532b3d57988b7b71df67500ba3abc62f49f13
3
+ size 291345
search_data/DIY egg substitutes _ PCC Community Markets.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bc96523f1bc10ea200c8b48c7537da529a6dafb55de0dca8c94400954beb760
3
+ size 102492
search_data/Delay or schedule sending email messages in Outlook - Microsoft Support.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f34aa19820fdff64c68ee9b504303eafefff95ef04b497a0c7494731e775bb
3
+ size 322375
search_data/Eggless Carrot Cake Recipe - Swasthi's Recipes.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e76cb0a825197235b937bc661f2b5364d54582489214c3eb29670a3035a5d8af
3
+ size 11134105
search_data/Eggless Carrot Walnut Cake - MJ and Hungryman.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bcc010c6536a4aacc516cd89a988845afd63bcb3a2bcef4aa17385673d7c1ba
3
+ size 3122188
search_data/Foraging for Mushrooms in Italy.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64091c4875852606d124cf9f163ed718c76028ac722916f12674b7c6b4fefc87
3
+ size 45431380
search_data/Mushroom hunting in Italy - Forests and trees _ funghimagazine.it.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5277ebdadbd744c565bfc64563836732727833e3b4e2f20037e7ccf791dedde4
3
+ size 32428069
search_data/Mushroom poisoning - Wikipedia.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:891de6630d9a405e74c9ec62c574dbf0480ba4bc4fad3b56a627c4a4fbeb1748
3
+ size 1885015
search_data/Post-Impressionism - Wikipedia.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf23c67aaf764f7627186013276ef4f12ae9deb34248561eef576255ef17be8e
3
+ size 6314276
search_data/Post-Impressionism Movement Overview _ TheArtStory.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:011a9f805549f63ff405408df6a6b996f9aca3bc685da99684b3de9ed855f12e
3
+ size 660112
search_data/Post-Impressionism _ Cézanne, Van Gogh & Gauguin _ Britannica.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4253305346b05efeabca0b81a90a31e0f19d0e8a86411bebd7d70b29f82b2e41
3
+ size 3106606
search_data/Post-Impressionism _ Essay _ The Metropolitan Museum of Art _ Heilbrunn Timeline of Art History.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:838cebda5a22471967d460f6188beb7ced29c0c0f301caf8fa2b68a474f437d1
3
+ size 112676
search_data/Proton Mail_ Get a private, secure, and encrypted email account _ Proton.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7600547cebb3b26121c6456c95b04deb46a28e1f75d843d4a2c24764f05f425d
3
+ size 7216995
search_data/Scheduling emails and HIPAA compliance.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f120b3bbb190d853664a6cad818469bfd10b50d709fa9d9a3a5706953606a32
3
+ size 3679160
search_data/Security and Privacy in Outlook Web App - Microsoft Support.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f8b44657e3f6513610690f2dfcbc86dbf31b4a005aa7ee6632a4a750513c8c
3
+ size 228545
search_data/Tree nut and peanut allergy _ healthdirect.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bdd079b0fec4f37a99ede3df4ba2be51156162bd2b03ca435dbb7bb8bde51d4
3
+ size 235370