Luca Foppiano commited on
Commit
9c5538b
2 Parent(s): ab9a153 01b5fcd

Merge pull request #25 from lfoppiano/review-interface

Browse files
Files changed (2) hide show
  1. README.md +9 -1
  2. streamlit_app.py +24 -17
README.md CHANGED
@@ -41,7 +41,7 @@ The conversation is kept in memory by a buffered sliding window memory (top 4 mo
41
  ## Getting started
42
 
43
  - Select the model+embedding combination you want to use
44
- - If using OpenAI, enter your API Key ([Open AI](https://platform.openai.com/account/api-keys)~~ or [Huggingface](https://huggingface.co/docs/hub/security-tokens))~~.
45
  - Upload a scientific article as a PDF document. You will see a spinner or loading indicator while the processing is in progress.
46
  - Once the spinner disappears, you can proceed to ask your questions
47
 
@@ -77,6 +77,14 @@ Error: `streamlit: Your system has an unsupported version of sqlite3. Chroma req
77
  Here the [solution on Linux](https://stackoverflow.com/questions/76958817/streamlit-your-system-has-an-unsupported-version-of-sqlite3-chroma-requires-sq).
78
  For more information, see the [details](https://docs.trychroma.com/troubleshooting#sqlite) on Chroma website.
79
 
 
 
 
 
 
 
 
 
80
  ## Development notes
81
 
82
  To release a new version:
 
41
  ## Getting started
42
 
43
  - Select the model+embedding combination you want to use
44
+ - If using gpt3.5-turbo, gpt4 or gpt4-turbo, enter your API Key ([Open AI](https://platform.openai.com/account/api-keys)).
45
  - Upload a scientific article as a PDF document. You will see a spinner or loading indicator while the processing is in progress.
46
  - Once the spinner disappears, you can proceed to ask your questions
47
 
 
77
  Here the [solution on Linux](https://stackoverflow.com/questions/76958817/streamlit-your-system-has-an-unsupported-version-of-sqlite3-chroma-requires-sq).
78
  For more information, see the [details](https://docs.trychroma.com/troubleshooting#sqlite) on Chroma website.
79
 
80
+ ## Disclaimer on Data, Security, and Privacy ⚠️
81
+
82
+ Please read carefully:
83
+
84
+ - Avoid uploading sensitive data. We temporarily store text from the uploaded PDF documents only for processing your request, and we disclaim any responsibility for subsequent use or handling of the submitted data by third-party LLMs.
85
+ - Mistral and Zephyr are FREE to use and do not require any API, but as we leverage the free API entrypoint, there is no guarantee that all requests will go through. Use at your own risk.
86
+ - We do not assume responsibility for how the data is utilized by the LLM end-points API.
87
+
88
  ## Development notes
89
 
90
  To release a new version:
streamlit_app.py CHANGED
@@ -19,6 +19,10 @@ from document_qa.document_qa_engine import DocumentQAEngine
19
  from document_qa.grobid_processors import GrobidAggregationProcessor, decorate_text_with_annotations
20
  from grobid_client_generic import GrobidClientGeneric
21
 
 
 
 
 
22
  if 'rqa' not in st.session_state:
23
  st.session_state['rqa'] = {}
24
 
@@ -117,17 +121,17 @@ def clear_memory():
117
  # @st.cache_resource
118
  def init_qa(model, api_key=None):
119
  ## For debug add: callbacks=[PromptLayerCallbackHandler(pl_tags=["langchain", "chatgpt", "document-qa"])])
120
- if model == 'chatgpt-3.5-turbo':
121
  st.session_state['memory'] = ConversationBufferWindowMemory(k=4)
122
  if api_key:
123
- chat = ChatOpenAI(model_name="gpt-3.5-turbo",
124
  temperature=0,
125
  openai_api_key=api_key,
126
  frequency_penalty=0.1)
127
  embeddings = OpenAIEmbeddings(openai_api_key=api_key)
128
 
129
  else:
130
- chat = ChatOpenAI(model_name="gpt-3.5-turbo",
131
  temperature=0,
132
  frequency_penalty=0.1)
133
  embeddings = OpenAIEmbeddings()
@@ -206,20 +210,23 @@ def play_old_messages():
206
  # is_api_key_provided = st.session_state['api_key']
207
 
208
  with st.sidebar:
209
- st.session_state['model'] = model = st.radio(
210
- "Model",
211
- ("chatgpt-3.5-turbo", "mistral-7b-instruct-v0.1", "zephyr-7b-beta"),
212
- index=2,
213
- captions=[
214
- "ChatGPT 3.5 Turbo + Ada-002-text (embeddings)",
215
- "Mistral-7B-Instruct-V0.1 + Sentence BERT (embeddings) :free:",
216
- "Zephyr-7B-beta + Sentence BERT (embeddings) :free:"
217
  ],
218
- help="Select the LLM model and embeddings you want to use.",
219
- disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded'])
 
 
 
220
 
221
  st.markdown(
222
- ":warning: Mistral and Zephyr are **FREE** to use. Requests might fail anytime. Use at your own risk. :warning: ")
223
 
224
  if (model == 'mistral-7b-instruct-v0.1' or model == 'zephyr-7b-beta') and model not in st.session_state['api_keys']:
225
  if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
@@ -238,7 +245,7 @@ with st.sidebar:
238
  # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
239
  st.session_state['rqa'][model] = init_qa(model)
240
 
241
- elif model == 'chatgpt-3.5-turbo' and model not in st.session_state['api_keys']:
242
  if 'OPENAI_API_KEY' not in os.environ:
243
  api_key = st.text_input('OpenAI API Key', type="password")
244
  st.markdown("Get it [here](https://platform.openai.com/account/api-keys)")
@@ -297,9 +304,9 @@ with st.sidebar:
297
  help="Number of chunks to consider when answering a question",
298
  disabled=not uploaded_file)
299
 
300
- st.session_state['ner_processing'] = st.checkbox("Named Entities Recognition (NER) processing on LLM response")
301
  st.markdown(
302
- '**NER on LLM responses**: The responses from the LLMs are post-processed to extract <span style="color:orange">physical quantities, measurements</span> and <span style="color:green">materials</span> mentions.',
303
  unsafe_allow_html=True)
304
 
305
  st.divider()
 
19
  from document_qa.grobid_processors import GrobidAggregationProcessor, decorate_text_with_annotations
20
  from grobid_client_generic import GrobidClientGeneric
21
 
22
+ OPENAI_MODELS = ['chatgpt-3.5-turbo',
23
+ "gpt-4",
24
+ "gpt-4-1106-preview"]
25
+
26
  if 'rqa' not in st.session_state:
27
  st.session_state['rqa'] = {}
28
 
 
121
  # @st.cache_resource
122
  def init_qa(model, api_key=None):
123
  ## For debug add: callbacks=[PromptLayerCallbackHandler(pl_tags=["langchain", "chatgpt", "document-qa"])])
124
+ if model in OPENAI_MODELS:
125
  st.session_state['memory'] = ConversationBufferWindowMemory(k=4)
126
  if api_key:
127
+ chat = ChatOpenAI(model_name=model,
128
  temperature=0,
129
  openai_api_key=api_key,
130
  frequency_penalty=0.1)
131
  embeddings = OpenAIEmbeddings(openai_api_key=api_key)
132
 
133
  else:
134
+ chat = ChatOpenAI(model_name=model,
135
  temperature=0,
136
  frequency_penalty=0.1)
137
  embeddings = OpenAIEmbeddings()
 
210
  # is_api_key_provided = st.session_state['api_key']
211
 
212
  with st.sidebar:
213
+ st.session_state['model'] = model = st.selectbox(
214
+ "Model:",
215
+ options=[
216
+ "chatgpt-3.5-turbo",
217
+ "mistral-7b-instruct-v0.1",
218
+ "zephyr-7b-beta",
219
+ "gpt-4",
220
+ "gpt-4-1106-preview"
221
  ],
222
+ index=2,
223
+ placeholder="Select model",
224
+ help="Select the LLM model:",
225
+ disabled=st.session_state['doc_id'] is not None or st.session_state['uploaded']
226
+ )
227
 
228
  st.markdown(
229
+ ":warning: [Usage disclaimer](https://github.com/lfoppiano/document-qa/tree/review-interface#disclaimer-on-data-security-and-privacy-%EF%B8%8F) :warning: ")
230
 
231
  if (model == 'mistral-7b-instruct-v0.1' or model == 'zephyr-7b-beta') and model not in st.session_state['api_keys']:
232
  if 'HUGGINGFACEHUB_API_TOKEN' not in os.environ:
 
245
  # os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
246
  st.session_state['rqa'][model] = init_qa(model)
247
 
248
+ elif model in OPENAI_MODELS and model not in st.session_state['api_keys']:
249
  if 'OPENAI_API_KEY' not in os.environ:
250
  api_key = st.text_input('OpenAI API Key', type="password")
251
  st.markdown("Get it [here](https://platform.openai.com/account/api-keys)")
 
304
  help="Number of chunks to consider when answering a question",
305
  disabled=not uploaded_file)
306
 
307
+ st.session_state['ner_processing'] = st.checkbox("Identify materials and properties.")
308
  st.markdown(
309
+ 'The LLM responses undergo post-processing to extract <span style="color:orange">physical quantities, measurements</span>, and <span style="color:green">materials</span> mentions.',
310
  unsafe_allow_html=True)
311
 
312
  st.divider()