import pickle import pandas as pd from sentence_transformers import SentenceTransformer from sklearn.neighbors import NearestNeighbors import gradio as gr # Load the embeddings from the file with open('embeddings.pkl', 'rb') as f: embeddings = pickle.load(f) # Initialize the Nearest Neighbors model with cosine similarity nbrs = NearestNeighbors(n_neighbors=20, metric='cosine').fit(embeddings) # Load the dataset df = pd.read_csv('quran_hadith.csv') # Initialize the SentenceTransformer model model = SentenceTransformer('all-MiniLM-L6-v2') def semantic_search(query, model, embeddings, nbrs, k=10): # Encode the query query_embedding = model.encode([query])[0] # Find the k nearest neighbors distances, indices = nbrs.kneighbors([query_embedding]) # Convert distances to percentages and round them to two decimal places # distances = [(1 - dist) * 100 for dist in distances[0]] # Cosine similarity as percentage # distances = [round(dist, 2) for dist in distances] # Return the k most similar sentences and their indices similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances)] return similar_sentences # Gradio function def search_interface(query): similar_sentences = semantic_search(query, model, embeddings, nbrs, k=10) results = [{"sentence": sentence, "similarity": f"{distance}%"} for sentence, distance in similar_sentences] return results # Create Gradio interface iface = gr.Interface( fn=search_interface, inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."), outputs=gr.JSON(label="Similar Sentences") ) # Launch the interface iface.launch(share=True)