Update functions.py
Browse files- functions.py +6 -4
functions.py
CHANGED
@@ -122,13 +122,13 @@ def load_asr_model(asr_model_name):
|
|
122 |
return asr_model
|
123 |
|
124 |
@st.experimental_singleton(suppress_st_warning=True)
|
125 |
-
def process_corpus(corpus,
|
126 |
|
127 |
'''Process text for Semantic Search'''
|
128 |
|
129 |
pinecone.init(api_key=OPEN_AI_KEY, environment="us-west1-gcp")
|
130 |
|
131 |
-
tokenizer =
|
132 |
text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer,chunk_size=chunk_size,chunk_overlap=overlap,separator='. ')
|
133 |
|
134 |
texts = text_splitter.split_text(corpus)
|
@@ -162,16 +162,18 @@ def gen_embeddings(embedding_model):
|
|
162 |
return embeddings
|
163 |
|
164 |
@st.experimental_memo(suppress_st_warning=True)
|
165 |
-
def embed_text(query,corpus,title,embedding_model,
|
166 |
|
167 |
'''Embed text and generate semantic search scores'''
|
168 |
|
169 |
index_id = "earnings-embeddings"
|
170 |
|
|
|
|
|
171 |
embeddings = gen_embeddings(embedding_model)
|
172 |
|
173 |
title = title[0]
|
174 |
-
docsearch = process_corpus(corpus,
|
175 |
|
176 |
docs = docsearch.similarity_search_with_score(query, k=3, namespace = f'{title}-earnings')
|
177 |
|
|
|
122 |
return asr_model
|
123 |
|
124 |
@st.experimental_singleton(suppress_st_warning=True)
|
125 |
+
def process_corpus(corpus, _tok, title, embeddings, chunk_size=200, overlap=50):
|
126 |
|
127 |
'''Process text for Semantic Search'''
|
128 |
|
129 |
pinecone.init(api_key=OPEN_AI_KEY, environment="us-west1-gcp")
|
130 |
|
131 |
+
tokenizer = _tok
|
132 |
text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer,chunk_size=chunk_size,chunk_overlap=overlap,separator='. ')
|
133 |
|
134 |
texts = text_splitter.split_text(corpus)
|
|
|
162 |
return embeddings
|
163 |
|
164 |
@st.experimental_memo(suppress_st_warning=True)
|
165 |
+
def embed_text(query,corpus,title,embedding_model,_emb_tok,chain_type='stuff'):
|
166 |
|
167 |
'''Embed text and generate semantic search scores'''
|
168 |
|
169 |
index_id = "earnings-embeddings"
|
170 |
|
171 |
+
|
172 |
+
|
173 |
embeddings = gen_embeddings(embedding_model)
|
174 |
|
175 |
title = title[0]
|
176 |
+
docsearch = process_corpus(corpus,_emb_tok,title, embeddings)
|
177 |
|
178 |
docs = docsearch.similarity_search_with_score(query, k=3, namespace = f'{title}-earnings')
|
179 |
|