eli02 commited on
Commit
b37e77c
·
1 Parent(s): bce0331

update: Refactor model type references to use 'text-embedding-3-small' in embedding generation and search functions

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -116,10 +116,10 @@ def generate_embedding(model, text, model_type="all-mpnet-base-v2"):
116
  convert_to_tensor = True
117
  )
118
  return np.array(t.Tensor.cpu(chunk_embedding))
119
- elif model_type == "openai":
120
  response = model.embeddings.create(
121
  input=text,
122
- model="text-embedding-3-small"
123
  )
124
  return response.data[0].embedding
125
 
@@ -127,7 +127,7 @@ def search_query(model, query, df, model_type, n=3):
127
  if model_type == "all-mpnet-base-v2":
128
  embedding = generate_embedding(model, query, model_type=model_type)
129
  df['similarities'] = df.all_mpnet_embedding.apply(lambda x: cosine_similarity(x, embedding))
130
- elif model_type == "openai":
131
  embedding = generate_embedding(model, query, model_type=model_type)
132
  df['similarities'] = df.openai_embedding.apply(lambda x: cosine_similarity(x, embedding))
133
  res = df.sort_values('similarities', ascending=False).head(n)
@@ -220,7 +220,7 @@ def main():
220
  st.session_state.top_results_mpnet = res_mpnet.index.tolist()
221
 
222
  # OpenAI search
223
- res_openai = search_query(client, query, df, "openai", n=1)
224
  st.session_state.top_results_openai = res_openai.index.tolist()
225
 
226
  end_time = timer()
@@ -237,7 +237,7 @@ def main():
237
  "text": df.iloc[int(st.session_state.top_results_mpnet[0])]["ext"]
238
  },
239
  {
240
- "model": "openai",
241
  "text": df.iloc[int(st.session_state.top_results_openai[0])]["ext"]
242
  }
243
  ]
 
116
  convert_to_tensor = True
117
  )
118
  return np.array(t.Tensor.cpu(chunk_embedding))
119
+ elif model_type == "text-embedding-3-small":
120
  response = model.embeddings.create(
121
  input=text,
122
+ model=model_type
123
  )
124
  return response.data[0].embedding
125
 
 
127
  if model_type == "all-mpnet-base-v2":
128
  embedding = generate_embedding(model, query, model_type=model_type)
129
  df['similarities'] = df.all_mpnet_embedding.apply(lambda x: cosine_similarity(x, embedding))
130
+ elif model_type == "text-embedding-3-small":
131
  embedding = generate_embedding(model, query, model_type=model_type)
132
  df['similarities'] = df.openai_embedding.apply(lambda x: cosine_similarity(x, embedding))
133
  res = df.sort_values('similarities', ascending=False).head(n)
 
220
  st.session_state.top_results_mpnet = res_mpnet.index.tolist()
221
 
222
  # OpenAI search
223
+ res_openai = search_query(client, query, df, "text-embedding-3-small", n=1)
224
  st.session_state.top_results_openai = res_openai.index.tolist()
225
 
226
  end_time = timer()
 
237
  "text": df.iloc[int(st.session_state.top_results_mpnet[0])]["ext"]
238
  },
239
  {
240
+ "model": "text-embedding-3-small",
241
  "text": df.iloc[int(st.session_state.top_results_openai[0])]["ext"]
242
  }
243
  ]