Spaces:
Sleeping
Sleeping
up[dates
Browse files- .gitignore +1 -0
- app.py +5 -3
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
|
|
1 |
.DS_Store
|
2 |
.streamlit/
|
|
|
1 |
+
.idea/
|
2 |
.DS_Store
|
3 |
.streamlit/
|
app.py
CHANGED
@@ -9,7 +9,7 @@ import openai
|
|
9 |
|
10 |
all_documents = {}
|
11 |
|
12 |
-
def qa_gpt3(
|
13 |
openai.api_key = st.secrets["openai_key"]
|
14 |
|
15 |
response = openai.Completion.create(
|
@@ -37,7 +37,7 @@ value='https://www.databricks.com/blog/2022/11/15/values-define-databricks-cultu
|
|
37 |
query = st.text_input("Query")
|
38 |
|
39 |
qa_option = st.selectbox('Q/A Answerer', ('gpt3', 'a-ware/bart-squadv2'))
|
40 |
-
tokenizing = st.selectbox('How to Tokenize', ("Don't (use entire body as document)", 'Newline (split by newline character)'))
|
41 |
|
42 |
if qa_option == 'gpt3':
|
43 |
qa_model = qa_gpt3
|
@@ -85,7 +85,9 @@ def get_documents(document_text, crawl=crawl_urls):
|
|
85 |
if tokenizing == "Don't (use entire body as document)":
|
86 |
document_paragraphs = [body]
|
87 |
elif tokenizing == 'Newline (split by newline character)':
|
88 |
-
document_paragraphs = [n for n in body.split('\n') if len(n) >
|
|
|
|
|
89 |
|
90 |
for document_paragraph in document_paragraphs:
|
91 |
all_documents[document_paragraph] = url
|
|
|
9 |
|
10 |
all_documents = {}
|
11 |
|
12 |
+
def qa_gpt3(query, context):
|
13 |
openai.api_key = st.secrets["openai_key"]
|
14 |
|
15 |
response = openai.Completion.create(
|
|
|
37 |
query = st.text_input("Query")
|
38 |
|
39 |
qa_option = st.selectbox('Q/A Answerer', ('gpt3', 'a-ware/bart-squadv2'))
|
40 |
+
tokenizing = st.selectbox('How to Tokenize', ("Don't (use entire body as document)", 'Newline (split by newline character)', 'Combo'))
|
41 |
|
42 |
if qa_option == 'gpt3':
|
43 |
qa_model = qa_gpt3
|
|
|
85 |
if tokenizing == "Don't (use entire body as document)":
|
86 |
document_paragraphs = [body]
|
87 |
elif tokenizing == 'Newline (split by newline character)':
|
88 |
+
document_paragraphs = [n for n in body.split('\n') if len(n) > 250]
|
89 |
+
elif tokenizing == 'Combo':
|
90 |
+
document_paragraphs = [body] + [n for n in body.split('\n') if len(n) > 250]
|
91 |
|
92 |
for document_paragraph in document_paragraphs:
|
93 |
all_documents[document_paragraph] = url
|