File size: 1,559 Bytes
7008b1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da490a8
 
7008b1f
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pickle
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.neighbors import NearestNeighbors
import gradio as gr

# Load the embeddings from the file
with open('embeddings.pkl', 'rb') as f:
    embeddings = pickle.load(f)

# Initialize the Nearest Neighbors model with cosine similarity
nbrs = NearestNeighbors(n_neighbors=10, metric='cosine').fit(embeddings)

# Load the dataset
df = pd.read_csv('quran_hadith.csv')

# Initialize the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

def semantic_search(query, model, embeddings, nbrs):
    # Encode the query
    query_embedding = model.encode([query])[0]

    # Find the k nearest neighbors
    distances, indices = nbrs.kneighbors([query_embedding])

    # Return the k most similar sentences and their distances
    similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances[0])]
    return similar_sentences

def search_interface(query):
    similar_sentences = semantic_search(query, model, embeddings, nbrs)
    sentences = [sentence for sentence, distance in similar_sentences]
    formatted_output = '\n\n'.join(sentences)  # Join sentences with double newlines for separation
    return formatted_output

pd.set_option('display.max_colwidth', None)


# Create Gradio interface
iface = gr.Interface(
    fn=search_interface,
    inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
    outputs=gr.Textbox(label="Similar Sentences")
)

# Launch the interface
iface.launch(share=True)