profoz commited on
Commit
1fef6a8
·
1 Parent(s): 046385a
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +5 -3
.gitignore CHANGED
@@ -1,2 +1,3 @@
 
1
  .DS_Store
2
  .streamlit/
 
1
+ .idea/
2
  .DS_Store
3
  .streamlit/
app.py CHANGED
@@ -9,7 +9,7 @@ import openai
9
 
10
  all_documents = {}
11
 
12
- def qa_gpt3(question, context):
13
  openai.api_key = st.secrets["openai_key"]
14
 
15
  response = openai.Completion.create(
@@ -37,7 +37,7 @@ value='https://www.databricks.com/blog/2022/11/15/values-define-databricks-cultu
37
  query = st.text_input("Query")
38
 
39
  qa_option = st.selectbox('Q/A Answerer', ('gpt3', 'a-ware/bart-squadv2'))
40
- tokenizing = st.selectbox('How to Tokenize', ("Don't (use entire body as document)", 'Newline (split by newline character)'))
41
 
42
  if qa_option == 'gpt3':
43
  qa_model = qa_gpt3
@@ -85,7 +85,9 @@ def get_documents(document_text, crawl=crawl_urls):
85
  if tokenizing == "Don't (use entire body as document)":
86
  document_paragraphs = [body]
87
  elif tokenizing == 'Newline (split by newline character)':
88
- document_paragraphs = [n for n in body.split('\n') if len(n) > 50]
 
 
89
 
90
  for document_paragraph in document_paragraphs:
91
  all_documents[document_paragraph] = url
 
9
 
10
  all_documents = {}
11
 
12
+ def qa_gpt3(query, context):
13
  openai.api_key = st.secrets["openai_key"]
14
 
15
  response = openai.Completion.create(
 
37
  query = st.text_input("Query")
38
 
39
  qa_option = st.selectbox('Q/A Answerer', ('gpt3', 'a-ware/bart-squadv2'))
40
+ tokenizing = st.selectbox('How to Tokenize', ("Don't (use entire body as document)", 'Newline (split by newline character)', 'Combo'))
41
 
42
  if qa_option == 'gpt3':
43
  qa_model = qa_gpt3
 
85
  if tokenizing == "Don't (use entire body as document)":
86
  document_paragraphs = [body]
87
  elif tokenizing == 'Newline (split by newline character)':
88
+ document_paragraphs = [n for n in body.split('\n') if len(n) > 250]
89
+ elif tokenizing == 'Combo':
90
+ document_paragraphs = [body] + [n for n in body.split('\n') if len(n) > 250]
91
 
92
  for document_paragraph in document_paragraphs:
93
  all_documents[document_paragraph] = url