Jan Mühlnikel
fixed search
e8954be
raw
history blame
934 Bytes
import pickle
import faiss
import streamlit as st
from sentence_transformers import SentenceTransformer
import pandas as pd
def search(query, model, embeddings, filtered_df, top_x=30):
filtered_df_indecies_list = filtered_df.index
filtered_embeddings = embeddings[filtered_df_indecies_list]
# Load or create FAISS index
dimension = filtered_embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(filtered_embeddings)
# Convert query to embedding
query_embedding = model.encode([query])[0].reshape(1, -1)
# Perform search
D, I = faiss_index.search(query_embedding, k=top_x) # Search for top x similar items
# Extract the sentences corresponding to the top indices
#print(filtered_df.columns())
top_indecies = [i for i in I[0]]
return filtered_df.iloc[top_indecies]