arslan-ahmed commited on
Commit
1b37f68
·
1 Parent(s): c138c11

added Watsonx models

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. app.py +94 -45
  3. requirements.txt +3 -1
  4. ttyd_consts.py +6 -4
  5. ttyd_functions.py +40 -20
README.md CHANGED
@@ -29,7 +29,7 @@ You can develop and deploy your own personal chatbot (similar to https://hugging
29
 
30
  docker pull arslan2k12/ttyd_base (https://hub.docker.com/r/arslan2k12/ttyd_base) <br/>
31
  docker pull arslan2k12/arslanbot (https://hub.docker.com/r/arslan2k12/arslanbot)<br/>
32
- docker run --rm -d -p 7860:7860 --env-file ./.env arslan2k12/arslanbot
33
 
34
 
35
  Contents of `.env` file:
 
29
 
30
  docker pull arslan2k12/ttyd_base (https://hub.docker.com/r/arslan2k12/ttyd_base) <br/>
31
  docker pull arslan2k12/arslanbot (https://hub.docker.com/r/arslan2k12/arslanbot)<br/>
32
+ docker run --rm -d -p 7860:7860 --env-file ./.env arslan2k12/ttyd_arslanbot
33
 
34
 
35
  Contents of `.env` file:
app.py CHANGED
@@ -9,6 +9,7 @@ from langchain.vectorstores import Chroma
9
  from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
10
  from langchain.chains import ConversationalRetrievalChain
11
  from langchain.chains import RetrievalQA
 
12
 
13
  import os
14
  from langchain.chat_models import ChatOpenAI
@@ -16,6 +17,12 @@ from langchain import OpenAI
16
  from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
17
  from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
18
 
 
 
 
 
 
 
19
  from collections import deque
20
  import re
21
  from bs4 import BeautifulSoup
@@ -31,7 +38,7 @@ from ttyd_consts import *
31
 
32
  load_dotenv()
33
 
34
- # select the mode at runtime when starting container - modes options are in ttyd_consts.py
35
  if (os.getenv("TTYD_MODE",'')).split('_')[0]=='personalBot':
36
  mode = mode_arslan
37
  gDriveUrl = (os.getenv("GDRIVE_FOLDER_URL",'')).replace('?usp=sharing','')
@@ -48,8 +55,8 @@ else:
48
 
49
 
50
  if mode.type!='userInputDocs':
51
- # local vector store as opposed to gradio state vector store
52
- vsDict_hard = localData_vecStore(os.getenv("OPENAI_API_KEY"), inputDir=mode.inputDir, file_list=mode.file_list, url_list=mode.url_list)
53
 
54
  ###############################################################################################
55
 
@@ -57,30 +64,27 @@ if mode.type!='userInputDocs':
57
 
58
  ###############################################################################################
59
 
60
- # initialize chatbot function sets the QA Chain, and also sets/updates any other components to start chatting. updateQaChain function only updates QA chain and will be called whenever Adv Settings are updated.
61
- def initializeChatbot(temp, k, modelName, stdlQs, api_key_st, vsDict_st, progress=gr.Progress()):
62
- progress(0.1, waitText_initialize)
63
- qa_chain_st = updateQaChain(temp, k, modelName, stdlQs, api_key_st, vsDict_st)
64
- progress(0.5, waitText_initialize)
65
- #generate welcome message
66
- if mode.welcomeMsg:
67
- welMsg = mode.welcomeMsg
68
- else:
69
- welMsg = qa_chain_st({'question': initialize_prompt, 'chat_history':[]})['answer']
70
- print('Chatbot initialized at ', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
71
-
72
- return qa_chain_st, btn.update(interactive=True), initChatbot_btn.update('Chatbot ready. Now visit the chatbot Tab.', interactive=False)\
73
- , oaiKey_tb.update(), gr.Tabs.update(selected='cb'), chatbot.update(value=[('', welMsg)])
74
-
75
-
76
  def setOaiApiKey(api_key):
77
- api_key = transformApi(api_key)
 
 
 
 
 
 
 
 
 
 
 
 
78
  try:
79
- openai.Model.list(api_key=api_key) # test the API key
 
80
  api_key_st = api_key
81
- return oaiKey_tb.update('API Key accepted', interactive=False, type='text'), oaiKey_btn.update(interactive=False), api_key_st
82
  except Exception as e:
83
- return oaiKey_tb.update(str(e), type='text'), *[x.update() for x in [oaiKey_btn, api_key_state]]
84
 
85
  # convert user uploaded data to vectorstore
86
  def uiData_vecStore(userFiles, userUrls, api_key_st, vsDict_st={}, progress=gr.Progress()):
@@ -103,8 +107,7 @@ def uiData_vecStore(userFiles, userUrls, api_key_st, vsDict_st={}, progress=gr.P
103
  docs = split_docs(documents)
104
  # Embeddings
105
  try:
106
- openai.Model.list(api_key=api_key_st) # test the API key
107
- embeddings = OpenAIEmbeddings(openai_api_key=api_key_st)
108
  except Exception as e:
109
  return {}, str(e), *[x.update() for x in opComponents]
110
 
@@ -117,18 +120,57 @@ def uiData_vecStore(userFiles, userUrls, api_key_st, vsDict_st={}, progress=gr.P
117
  progress(1, 'Data loaded')
118
  return vsDict_st, src_str, *[x.update(interactive=False) for x in [data_ingest_btn, upload_fb]], urls_tb.update(interactive=False, placeholder='')
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  # just update the QA Chain, no updates to any UI
121
- def updateQaChain(temp, k, modelName, stdlQs, api_key_st, vsDict_st):
122
  # if we are not adding data from ui, then use vsDict_hard as vectorstore
123
  if vsDict_st=={} and mode.type!='userInputDocs': vsDict_st=vsDict_hard
124
- modelName = modelName.split('(')[0].strip() # so we can provide any info in brackets
125
- # check if the input model is chat model or legacy model
126
- try:
127
- ChatOpenAI(openai_api_key=api_key_st, temperature=0,model_name=modelName,max_tokens=1).predict('')
128
- llm = ChatOpenAI(openai_api_key=api_key_st, temperature=float(temp),model_name=modelName)
129
- except:
130
- OpenAI(openai_api_key=api_key_st, temperature=0,model_name=modelName,max_tokens=1).predict('')
131
- llm = OpenAI(openai_api_key=api_key_st, temperature=float(temp),model_name=modelName)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  # settingsUpdated = 'Settings updated:'+ ' Model=' + modelName + ', Temp=' + str(temp)+ ', k=' + str(k)
133
  # gr.Info(settingsUpdated)
134
 
@@ -150,7 +192,7 @@ def updateQaChain(temp, k, modelName, stdlQs, api_key_st, vsDict_st):
150
  return_generated_question=True
151
  )
152
 
153
- return qa_chain_st
154
 
155
 
156
  def respond(message, chat_history, qa_chain):
@@ -172,7 +214,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
172
  # Initialize state variables - stored in this browser session - these can only be used within input or output of .click/.submit etc, not as a python var coz they are not stored in backend, only as a frontend gradio component
173
  # but if you initialize it with a default value, that value will be stored in backend and accessible across all users. You can also change it with statear.value='newValue'
174
  qa_state = gr.State()
175
- api_key_state = gr.State(getPersonalBotApiKey() if mode.type=='personalBot' else 'Null') # can be string (OpenAI) or dict (WX)
176
  chromaVS_state = gr.State({})
177
 
178
 
@@ -183,9 +225,14 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
183
  with gr.Row():
184
  with gr.Column():
185
  oaiKey_tb = gr.Textbox(label="OpenAI API Key", type='password'\
186
- , info='You can find OpenAI API key at https://platform.openai.com/account/api-keys'\
187
- , placeholder='Enter your API key here and hit enter to begin chatting')
188
- oaiKey_btn = gr.Button("Submit API Key")
 
 
 
 
 
189
  with gr.Row(visible=mode.uiAddDataVis):
190
  upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv'])
191
  urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
@@ -203,8 +250,6 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
203
  with gr.Row():
204
  btn = gr.Button("Send Message", interactive=False, variant="primary")
205
  clear = gr.ClearButton(components=[msg, chatbot, srcDocs], value="Clear chat history")
206
- # exp_comp = gr.Dataset(scale=0.7, samples=[['123'],['456'], ['123'],['456'],['456']], components=[msg], label='Examples (auto generated by LLM)', visible=False)
207
- # gr.Examples(examples=exps, inputs=msg)
208
  with gr.Accordion("Advance Settings - click to expand", open=False):
209
  with gr.Row():
210
  with gr.Column():
@@ -220,23 +265,27 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
220
 
221
  ### Setup the Gradio Event Listeners
222
 
223
- # API button
224
- oaiKey_btn_args = {'fn':setOaiApiKey, 'inputs':[oaiKey_tb], 'outputs':[oaiKey_tb, oaiKey_btn, api_key_state]}
225
  oaiKey_btn.click(**oaiKey_btn_args)
226
  oaiKey_tb.submit(**oaiKey_btn_args)
227
 
 
 
 
 
228
  # Data Ingest Button
229
  data_ingest_event = data_ingest_btn.click(uiData_vecStore, [upload_fb, urls_tb, api_key_state, chromaVS_state], [chromaVS_state, status_tb, data_ingest_btn, upload_fb, urls_tb])
230
 
231
  # Adv Settings
232
- advSet_args = {'fn':updateQaChain, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state]}
233
  temp_sld.release(**advSet_args)
234
  k_sld.release(**advSet_args)
235
  model_dd.change(**advSet_args)
236
  stdlQs_rb.change(**advSet_args)
237
 
238
  # Initialize button
239
- initCb_args = {'fn':initializeChatbot, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state, btn, initChatbot_btn, oaiKey_tb, tabs, chatbot]}
240
  if mode.type=='personalBot':
241
  demo.load(**initCb_args) # load Chatbot UI directly on startup
242
  initChatbot_btn.click(**initCb_args)
 
9
  from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
10
  from langchain.chains import ConversationalRetrievalChain
11
  from langchain.chains import RetrievalQA
12
+ from langchain.embeddings import SentenceTransformerEmbeddings
13
 
14
  import os
15
  from langchain.chat_models import ChatOpenAI
 
17
  from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
18
  from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
19
 
20
+ from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
21
+ from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams
22
+ from ibm_watson_machine_learning.foundation_models.utils.enums import DecodingMethods
23
+ from ibm_watson_machine_learning.foundation_models import Model
24
+ from ibm_watson_machine_learning.foundation_models.extensions.langchain import WatsonxLLM
25
+
26
  from collections import deque
27
  import re
28
  from bs4 import BeautifulSoup
 
38
 
39
  load_dotenv()
40
 
41
+ # select the mode when starting container - modes options are in ttyd_consts.py
42
  if (os.getenv("TTYD_MODE",'')).split('_')[0]=='personalBot':
43
  mode = mode_arslan
44
  gDriveUrl = (os.getenv("GDRIVE_FOLDER_URL",'')).replace('?usp=sharing','')
 
55
 
56
 
57
  if mode.type!='userInputDocs':
58
+ # local vector store as opposed to gradio state vector store, if we the user is not uploading the docs
59
+ vsDict_hard = localData_vecStore(getPersonalBotApiKey(), inputDir=mode.inputDir, file_list=mode.file_list, url_list=mode.url_list)
60
 
61
  ###############################################################################################
62
 
 
64
 
65
  ###############################################################################################
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def setOaiApiKey(api_key):
68
+ credComps = [oaiKey_btn, wxKey_tb, wxPid_tb, wxKey_btn]
69
+ api_key = getOaiCreds(api_key)
70
+ try:
71
+ openai.Model.list(api_key=api_key.get('oai_key','Null')) # test the API key
72
+ api_key_st = api_key
73
+ return oaiKey_tb.update('API Key accepted', interactive=False, type='text'), *[x.update(interactive=False) for x in credComps], api_key_st
74
+ except Exception as e:
75
+ return oaiKey_tb.update(str(e), type='text'), *[x.update() for x in credComps+[api_key_state]]
76
+
77
+
78
+ def setWxApiKey(key, p_id):
79
+ credComps = [wxKey_btn, oaiKey_tb, oaiKey_btn]
80
+ api_key = getWxCreds(key, p_id)
81
  try:
82
+ testModel = Model(model_id=ModelTypes.FLAN_UL2, credentials=api_key['credentials'], project_id=api_key['project_id']) # test the API key
83
+ del testModel
84
  api_key_st = api_key
85
+ return *[x.update('Watsonx credentials accepted', interactive=False, type='text') for x in [wxKey_tb, wxPid_tb]], *[x.update(interactive=False) for x in credComps], api_key_st
86
  except Exception as e:
87
+ return *[x.update(str(e), type='text') for x in [wxKey_tb, wxPid_tb]], *[x.update() for x in credComps+[api_key_state]]
88
 
89
  # convert user uploaded data to vectorstore
90
  def uiData_vecStore(userFiles, userUrls, api_key_st, vsDict_st={}, progress=gr.Progress()):
 
107
  docs = split_docs(documents)
108
  # Embeddings
109
  try:
110
+ embeddings = getEmbeddingFunc(api_key_st)
 
111
  except Exception as e:
112
  return {}, str(e), *[x.update() for x in opComponents]
113
 
 
120
  progress(1, 'Data loaded')
121
  return vsDict_st, src_str, *[x.update(interactive=False) for x in [data_ingest_btn, upload_fb]], urls_tb.update(interactive=False, placeholder='')
122
 
123
+ # initialize chatbot function sets the QA Chain, and also sets/updates any other components to start chatting. updateQaChain function only updates QA chain and will be called whenever Adv Settings are updated.
124
+ def initializeChatbot(temp, k, modelName, stdlQs, api_key_st, vsDict_st, progress=gr.Progress()):
125
+ progress(0.1, waitText_initialize)
126
+ chainTuple = updateQaChain(temp, k, modelName, stdlQs, api_key_st, vsDict_st)
127
+ qa_chain_st = chainTuple[0]
128
+ progress(0.5, waitText_initialize)
129
+ #generate welcome message
130
+ if mode.welcomeMsg:
131
+ welMsg = mode.welcomeMsg
132
+ else:
133
+ welMsg = qa_chain_st({'question': initialize_prompt, 'chat_history':[]})['answer']
134
+ print('Chatbot initialized at ', datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
135
+
136
+ return qa_chain_st, chainTuple[1], btn.update(interactive=True), initChatbot_btn.update('Chatbot ready. Now visit the chatbot Tab.', interactive=False)\
137
+ , oaiKey_tb.update(), gr.Tabs.update(selected='cb'), chatbot.update(value=[('', welMsg)])
138
+
139
  # just update the QA Chain, no updates to any UI
140
+ def updateQaChain(temp, k, modelNameDD, stdlQs, api_key_st, vsDict_st):
141
  # if we are not adding data from ui, then use vsDict_hard as vectorstore
142
  if vsDict_st=={} and mode.type!='userInputDocs': vsDict_st=vsDict_hard
143
+
144
+ if api_key_st.get('service')=='openai':
145
+ if not 'openai' in modelNameDD:
146
+ modelNameDD = 'gpt-3.5-turbo (openai)' # default model for openai
147
+ modelName = modelNameDD.split('(')[0].strip()
148
+ # check if the input model is chat model or legacy model
149
+ try:
150
+ ChatOpenAI(openai_api_key=api_key_st.get('oai_key','Null'), temperature=0,model_name=modelName,max_tokens=1).predict('')
151
+ llm = ChatOpenAI(openai_api_key=api_key_st.get('oai_key','Null'), temperature=float(temp),model_name=modelName)
152
+ except:
153
+ OpenAI(openai_api_key=api_key_st.get('oai_key','Null'), temperature=0,model_name=modelName,max_tokens=1).predict('')
154
+ llm = OpenAI(openai_api_key=api_key_st.get('oai_key','Null'), temperature=float(temp),model_name=modelName)
155
+ elif api_key_st.get('service')=='watsonx':
156
+ if not 'watsonx' in modelNameDD:
157
+ modelNameDD = 'meta-llama/llama-2-70b-chat (watsonx)' # default model for watsonx
158
+ modelName = modelNameDD.split('(')[0].strip()
159
+ wxModelParams = {
160
+ GenParams.DECODING_METHOD: DecodingMethods.SAMPLE,
161
+ GenParams.MAX_NEW_TOKENS: 1000,
162
+ GenParams.MIN_NEW_TOKENS: 1,
163
+ GenParams.TEMPERATURE: float(temp),
164
+ GenParams.TOP_K: 50,
165
+ GenParams.TOP_P: 1
166
+ }
167
+ flan_ul2_model = Model(
168
+ model_id=modelName,
169
+ params=wxModelParams,
170
+ credentials=api_key_st['credentials'], project_id=api_key_st['project_id'])
171
+ llm = WatsonxLLM(model=flan_ul2_model)
172
+ else:
173
+ raise Exception('Error: Invalid or None Credentials')
174
  # settingsUpdated = 'Settings updated:'+ ' Model=' + modelName + ', Temp=' + str(temp)+ ', k=' + str(k)
175
  # gr.Info(settingsUpdated)
176
 
 
192
  return_generated_question=True
193
  )
194
 
195
+ return qa_chain_st, model_dd.update(value=modelNameDD)
196
 
197
 
198
  def respond(message, chat_history, qa_chain):
 
214
  # Initialize state variables - stored in this browser session - these can only be used within input or output of .click/.submit etc, not as a python var coz they are not stored in backend, only as a frontend gradio component
215
  # but if you initialize it with a default value, that value will be stored in backend and accessible across all users. You can also change it with statear.value='newValue'
216
  qa_state = gr.State()
217
+ api_key_state = gr.State(getPersonalBotApiKey() if mode.type=='personalBot' else {}) # can be string (OpenAI) or dict (WX)
218
  chromaVS_state = gr.State({})
219
 
220
 
 
225
  with gr.Row():
226
  with gr.Column():
227
  oaiKey_tb = gr.Textbox(label="OpenAI API Key", type='password'\
228
+ , info='You can find OpenAI API key at https://platform.openai.com/account/api-keys')
229
+ oaiKey_btn = gr.Button("Submit OpenAI API Key")
230
+ with gr.Column():
231
+ wxKey_tb = gr.Textbox(label="Watsonx API Key", type='password'\
232
+ , info='You can find IBM Cloud API Key at Manage > Access (IAM) > API keys on https://cloud.ibm.com/iam/overview')
233
+ wxPid_tb = gr.Textbox(label="Watsonx Project ID"\
234
+ , info='You can find Project ID at Project -> Manage -> General -> Details on https://dataplatform.cloud.ibm.com/wx/home')
235
+ wxKey_btn = gr.Button("Submit Watsonx Credentials")
236
  with gr.Row(visible=mode.uiAddDataVis):
237
  upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv'])
238
  urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
 
250
  with gr.Row():
251
  btn = gr.Button("Send Message", interactive=False, variant="primary")
252
  clear = gr.ClearButton(components=[msg, chatbot, srcDocs], value="Clear chat history")
 
 
253
  with gr.Accordion("Advance Settings - click to expand", open=False):
254
  with gr.Row():
255
  with gr.Column():
 
265
 
266
  ### Setup the Gradio Event Listeners
267
 
268
+ # OpenAI API button
269
+ oaiKey_btn_args = {'fn':setOaiApiKey, 'inputs':[oaiKey_tb], 'outputs':[oaiKey_tb, oaiKey_btn, wxKey_tb, wxPid_tb, wxKey_btn, api_key_state]}
270
  oaiKey_btn.click(**oaiKey_btn_args)
271
  oaiKey_tb.submit(**oaiKey_btn_args)
272
 
273
+ # Watsonx Creds button
274
+ wxKey_btn_args = {'fn':setWxApiKey, 'inputs':[wxKey_tb, wxPid_tb], 'outputs':[wxKey_tb, wxPid_tb, wxKey_btn, oaiKey_tb, oaiKey_btn, api_key_state]}
275
+ wxKey_btn.click(**wxKey_btn_args)
276
+
277
  # Data Ingest Button
278
  data_ingest_event = data_ingest_btn.click(uiData_vecStore, [upload_fb, urls_tb, api_key_state, chromaVS_state], [chromaVS_state, status_tb, data_ingest_btn, upload_fb, urls_tb])
279
 
280
  # Adv Settings
281
+ advSet_args = {'fn':updateQaChain, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state, model_dd]}
282
  temp_sld.release(**advSet_args)
283
  k_sld.release(**advSet_args)
284
  model_dd.change(**advSet_args)
285
  stdlQs_rb.change(**advSet_args)
286
 
287
  # Initialize button
288
+ initCb_args = {'fn':initializeChatbot, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state, model_dd, btn, initChatbot_btn, oaiKey_tb, tabs, chatbot]}
289
  if mode.type=='personalBot':
290
  demo.load(**initCb_args) # load Chatbot UI directly on startup
291
  initChatbot_btn.click(**initCb_args)
requirements.txt CHANGED
@@ -7,4 +7,6 @@ pypdf
7
  gradio
8
  PyMuPDF
9
  gdown
10
- docx2txt
 
 
 
7
  gradio
8
  PyMuPDF
9
  gdown
10
+ docx2txt
11
+ sentence-transformers
12
+ ibm-watson-machine-learning
ttyd_consts.py CHANGED
@@ -1,10 +1,12 @@
 
 
1
  exp_query = 'Generate top 5 questions that I can ask about this data. Questions should be very precise and short, ideally less than 10 words.'
2
 
3
  waitText_initialize = 'Preparing the documents, please wait...'
4
 
5
- initialize_prompt = 'Write a short welcome message to the user. Describe the data with a comprehensive overview including short summary.\
6
  If this data is about a person, mention his name instead of using pronouns. After describing the overview, you should mention top 3 example questions that the user can ask about this data.\
7
- \n\nYour response should be short and precise. Format of your response should be Summary:\n{Description and Summary} \n\n Example Questions:\n{Example Questions}'
8
 
9
  nustian_exps = ['Tell me about NUSTIAN',
10
  'Who is the NUSTIAN regional lead for Silicon Valley?',
@@ -24,7 +26,7 @@ stdlQs_rb_choices = ['Retrieve relavant docs using original question, send orig
24
 
25
  model_dd_info = 'You can also input any OpenAI model name, compatible with /v1/completions or /v1/chat/completions endpoint. Details: https://platform.openai.com/docs/models/'
26
 
27
- model_dd_choices = ['gpt-3.5-turbo', 'gpt-3.5-turbo-16k', 'gpt-4', 'text-davinci-003 (Legacy)', 'text-curie-001 (Legacy)', 'babbage-002']
28
 
29
  url_tb_info = 'Upto 100 domain webpages will be crawled for each URL. You can also enter online PDF files.'
30
 
@@ -33,7 +35,7 @@ url_tb_ph = 'https://example.com, https://another.com, https://anyremotedocument
33
 
34
  md_title_general = """
35
  ## Chat with your documents and websites<br>
36
- Step 1) Enter your OpenAI API Key, and click Submit.<br>
37
  Step 2) Upload your documents and/or enter URLs, then click Load Data.<br>
38
  Step 3) Once data is loaded, click Initialize Chatbot (at the bottom of the page) to start talking to your data.<br>
39
 
 
1
+ from ibm_watson_machine_learning.foundation_models.utils.enums import ModelTypes
2
+
3
  exp_query = 'Generate top 5 questions that I can ask about this data. Questions should be very precise and short, ideally less than 10 words.'
4
 
5
  waitText_initialize = 'Preparing the documents, please wait...'
6
 
7
+ initialize_prompt = """Write a short welcome message to the user. Describe the data with a comprehensive overview including short summary.\
8
  If this data is about a person, mention his name instead of using pronouns. After describing the overview, you should mention top 3 example questions that the user can ask about this data.\
9
+ \n\nYour response should be short and precise. Format of your response should be Summary:\n{Description and Summary} \n\n Example Questions:\n{Example Questions}"""
10
 
11
  nustian_exps = ['Tell me about NUSTIAN',
12
  'Who is the NUSTIAN regional lead for Silicon Valley?',
 
26
 
27
  model_dd_info = 'You can also input any OpenAI model name, compatible with /v1/completions or /v1/chat/completions endpoint. Details: https://platform.openai.com/docs/models/'
28
 
29
+ model_dd_choices = ['gpt-3.5-turbo (openai)', 'gpt-3.5-turbo-16k (openai)', 'gpt-4 (openai)', 'text-davinci-003 (Legacy - openai)', 'text-curie-001 (Legacy - openai)', 'babbage-002 (openai)'] + [model.value+' (watsonx)' for model in ModelTypes]
30
 
31
  url_tb_info = 'Upto 100 domain webpages will be crawled for each URL. You can also enter online PDF files.'
32
 
 
35
 
36
  md_title_general = """
37
  ## Chat with your documents and websites<br>
38
+ Step 1) Enter your OpenAI API or Watsonx Credentials, and click Submit.<br>
39
  Step 2) Upload your documents and/or enter URLs, then click Load Data.<br>
40
  Step 3) Once data is loaded, click Initialize Chatbot (at the bottom of the page) to start talking to your data.<br>
41
 
ttyd_functions.py CHANGED
@@ -1,9 +1,11 @@
1
 
2
  import datetime
3
  import uuid
 
4
  from langchain.embeddings import OpenAIEmbeddings
5
  from langchain.vectorstores import Chroma
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
7
 
8
  import os
9
  from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
@@ -25,15 +27,31 @@ mimetypes.init()
25
  media_files = tuple([x for x in mimetypes.types_map if mimetypes.types_map[x].split('/')[0] in ['image', 'video', 'audio']])
26
  filter_strings = ['/email-protection#']
27
 
28
-
29
- def transformApi(api_key=''):
30
- if api_key==os.getenv("TEMP_PWD"):
31
- return os.getenv("OPENAI_API_KEY")
32
- elif api_key is None or api_key=='':
33
- return 'Null'
 
 
 
 
 
 
 
 
34
  else:
35
- return api_key
36
 
 
 
 
 
 
 
 
 
37
  def get_hyperlinks(url):
38
  try:
39
  reqs = requests.get(url)
@@ -226,6 +244,18 @@ def getSourcesFromMetadata(metadata, sourceOnly=True, sepFileUrl=True):
226
  src_docs = '\n'.join(([f"{i+1}) {x}" for i,x in enumerate(sorted(list(setSrc), key=str.casefold))]))
227
  return src_docs, len(setSrc)
228
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
  def getVsDict(embeddingFunc, docs, vsDict={}):
231
  # create chroma client if doesnt exist
@@ -241,13 +271,13 @@ def getVsDict(embeddingFunc, docs, vsDict={}):
241
  return vsDict
242
 
243
  # used for Hardcoded documents only - not uploaded by user (userData_vecStore is separate function)
244
- def localData_vecStore(openApiKey=None, inputDir=None, file_list=[], url_list=[], vsDict={}):
245
  documents = data_ingestion(inputDir, file_list, url_list)
246
  if not documents:
247
- return {}
248
  docs = split_docs(documents)
249
  # Embeddings
250
- embeddings = OpenAIEmbeddings(openai_api_key=openApiKey)
251
  # create chroma client if doesnt exist
252
  vsDict_hd = getVsDict(embeddings, docs, vsDict)
253
  # get sources from metadata
@@ -263,13 +293,3 @@ def num_tokens_from_string(string, encoding_name = "cl100k_base"):
263
  num_tokens = len(encoding.encode(string))
264
  return num_tokens
265
 
266
- def getPersonalBotApiKey():
267
- if os.getenv("OPENAI_API_KEY"):
268
- return os.getenv("OPENAI_API_KEY")
269
- elif os.getenv("WX_API_KEY"):
270
- wxCreds = {'credentials' : {"url": "https://us-south.ml.cloud.ibm.com", "apikey": os.getenv("WX_API_KEY") },
271
- 'project_id': os.getenv("WX_PROJECT_ID")
272
- }
273
- return wxCreds
274
- else:
275
- return None
 
1
 
2
  import datetime
3
  import uuid
4
+ import openai
5
  from langchain.embeddings import OpenAIEmbeddings
6
  from langchain.vectorstores import Chroma
7
  from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.embeddings import SentenceTransformerEmbeddings
9
 
10
  import os
11
  from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
 
27
  media_files = tuple([x for x in mimetypes.types_map if mimetypes.types_map[x].split('/')[0] in ['image', 'video', 'audio']])
28
  filter_strings = ['/email-protection#']
29
 
30
+ def getOaiCreds(key):
31
+ if key:
32
+ return {'service': 'openai',
33
+ 'oai_key' : key
34
+ }
35
+ else:
36
+ return {}
37
+
38
+ def getWxCreds(key, p_id):
39
+ if key and p_id:
40
+ return {'service': 'watsonx',
41
+ 'credentials' : {"url": "https://us-south.ml.cloud.ibm.com", "apikey": key },
42
+ 'project_id': p_id
43
+ }
44
  else:
45
+ return {}
46
 
47
+ def getPersonalBotApiKey():
48
+ if os.getenv("OPENAI_API_KEY"):
49
+ return getOaiCreds(os.getenv("OPENAI_API_KEY"))
50
+ elif os.getenv("WX_API_KEY") and os.getenv("WX_PROJECT_ID"):
51
+ return getWxCreds(os.getenv("WX_API_KEY"), os.getenv("WX_PROJECT_ID"))
52
+ else:
53
+ return {}
54
+
55
  def get_hyperlinks(url):
56
  try:
57
  reqs = requests.get(url)
 
244
  src_docs = '\n'.join(([f"{i+1}) {x}" for i,x in enumerate(sorted(list(setSrc), key=str.casefold))]))
245
  return src_docs, len(setSrc)
246
 
247
+ def getEmbeddingFunc(creds):
248
+ # OpenAI key used
249
+ if creds.get('service')=='openai':
250
+ embeddings = OpenAIEmbeddings(openai_api_key=creds.get('oai_key','Null'))
251
+ # WX key used
252
+ elif creds.get('service')=='watsonx':
253
+ # testModel = Model(model_id=ModelTypes.FLAN_UL2, credentials=creds['credentials'], project_id=creds['project_id']) # test the API key
254
+ # del testModel
255
+ embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") # for now use OpenSource model for embedding as WX doesnt have any embedding model
256
+ else:
257
+ raise Exception('Error: Invalid or None Credentials')
258
+ return embeddings
259
 
260
  def getVsDict(embeddingFunc, docs, vsDict={}):
261
  # create chroma client if doesnt exist
 
271
  return vsDict
272
 
273
  # used for Hardcoded documents only - not uploaded by user (userData_vecStore is separate function)
274
+ def localData_vecStore(embKey={}, inputDir=None, file_list=[], url_list=[], vsDict={}):
275
  documents = data_ingestion(inputDir, file_list, url_list)
276
  if not documents:
277
+ raise Exception('Error: No Documents Found')
278
  docs = split_docs(documents)
279
  # Embeddings
280
+ embeddings = getEmbeddingFunc(embKey)
281
  # create chroma client if doesnt exist
282
  vsDict_hd = getVsDict(embeddings, docs, vsDict)
283
  # get sources from metadata
 
293
  num_tokens = len(encoding.encode(string))
294
  return num_tokens
295