update: Refactor model type references to use 'text-embedding-3-small' in embedding generation and search functions
Browse files
app.py
CHANGED
@@ -116,10 +116,10 @@ def generate_embedding(model, text, model_type="all-mpnet-base-v2"):
|
|
116 |
convert_to_tensor = True
|
117 |
)
|
118 |
return np.array(t.Tensor.cpu(chunk_embedding))
|
119 |
-
elif model_type == "
|
120 |
response = model.embeddings.create(
|
121 |
input=text,
|
122 |
-
model=
|
123 |
)
|
124 |
return response.data[0].embedding
|
125 |
|
@@ -127,7 +127,7 @@ def search_query(model, query, df, model_type, n=3):
|
|
127 |
if model_type == "all-mpnet-base-v2":
|
128 |
embedding = generate_embedding(model, query, model_type=model_type)
|
129 |
df['similarities'] = df.all_mpnet_embedding.apply(lambda x: cosine_similarity(x, embedding))
|
130 |
-
elif model_type == "
|
131 |
embedding = generate_embedding(model, query, model_type=model_type)
|
132 |
df['similarities'] = df.openai_embedding.apply(lambda x: cosine_similarity(x, embedding))
|
133 |
res = df.sort_values('similarities', ascending=False).head(n)
|
@@ -220,7 +220,7 @@ def main():
|
|
220 |
st.session_state.top_results_mpnet = res_mpnet.index.tolist()
|
221 |
|
222 |
# OpenAI search
|
223 |
-
res_openai = search_query(client, query, df, "
|
224 |
st.session_state.top_results_openai = res_openai.index.tolist()
|
225 |
|
226 |
end_time = timer()
|
@@ -237,7 +237,7 @@ def main():
|
|
237 |
"text": df.iloc[int(st.session_state.top_results_mpnet[0])]["ext"]
|
238 |
},
|
239 |
{
|
240 |
-
"model": "
|
241 |
"text": df.iloc[int(st.session_state.top_results_openai[0])]["ext"]
|
242 |
}
|
243 |
]
|
|
|
116 |
convert_to_tensor = True
|
117 |
)
|
118 |
return np.array(t.Tensor.cpu(chunk_embedding))
|
119 |
+
elif model_type == "text-embedding-3-small":
|
120 |
response = model.embeddings.create(
|
121 |
input=text,
|
122 |
+
model=model_type
|
123 |
)
|
124 |
return response.data[0].embedding
|
125 |
|
|
|
127 |
if model_type == "all-mpnet-base-v2":
|
128 |
embedding = generate_embedding(model, query, model_type=model_type)
|
129 |
df['similarities'] = df.all_mpnet_embedding.apply(lambda x: cosine_similarity(x, embedding))
|
130 |
+
elif model_type == "text-embedding-3-small":
|
131 |
embedding = generate_embedding(model, query, model_type=model_type)
|
132 |
df['similarities'] = df.openai_embedding.apply(lambda x: cosine_similarity(x, embedding))
|
133 |
res = df.sort_values('similarities', ascending=False).head(n)
|
|
|
220 |
st.session_state.top_results_mpnet = res_mpnet.index.tolist()
|
221 |
|
222 |
# OpenAI search
|
223 |
+
res_openai = search_query(client, query, df, "text-embedding-3-small", n=1)
|
224 |
st.session_state.top_results_openai = res_openai.index.tolist()
|
225 |
|
226 |
end_time = timer()
|
|
|
237 |
"text": df.iloc[int(st.session_state.top_results_mpnet[0])]["ext"]
|
238 |
},
|
239 |
{
|
240 |
+
"model": "text-embedding-3-small",
|
241 |
"text": df.iloc[int(st.session_state.top_results_openai[0])]["ext"]
|
242 |
}
|
243 |
]
|