mhdhrubo commited on
Commit
7c5cc2a
1 Parent(s): 8b17762

adding files

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. .gitignore +8 -0
  3. app.py +49 -0
  4. embeddings.pkl +3 -0
  5. quran_hadith.csv +3 -0
  6. requirements.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ quran_hadith.csv filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ venv/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ *.pyc
7
+ *.pyo
8
+ *.pyd
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import pandas as pd
3
+ from sentence_transformers import SentenceTransformer
4
+ from sklearn.neighbors import NearestNeighbors
5
+ import gradio as gr
6
+
7
+ # Load the embeddings from the file
8
+ with open('embeddings.pkl', 'rb') as f:
9
+ embeddings = pickle.load(f)
10
+
11
+ # Initialize the Nearest Neighbors model with cosine similarity
12
+ nbrs = NearestNeighbors(n_neighbors=20, metric='cosine').fit(embeddings)
13
+
14
+ # Load the dataset
15
+ df = pd.read_csv('quran_hadith.csv')
16
+
17
+ # Initialize the SentenceTransformer model
18
+ model = SentenceTransformer('all-MiniLM-L6-v2')
19
+
20
+ def semantic_search(query, model, embeddings, nbrs, k=10):
21
+ # Encode the query
22
+ query_embedding = model.encode([query])[0]
23
+
24
+ # Find the k nearest neighbors
25
+ distances, indices = nbrs.kneighbors([query_embedding])
26
+
27
+ # Convert distances to percentages and round them to two decimal places
28
+ # distances = [(1 - dist) * 100 for dist in distances[0]] # Cosine similarity as percentage
29
+ # distances = [round(dist, 2) for dist in distances]
30
+
31
+ # Return the k most similar sentences and their indices
32
+ similar_sentences = [(df['text'].iloc[idx], dist) for idx, dist in zip(indices[0], distances)]
33
+ return similar_sentences
34
+
35
+ # Gradio function
36
+ def search_interface(query):
37
+ similar_sentences = semantic_search(query, model, embeddings, nbrs, k=10)
38
+ results = [{"sentence": sentence, "similarity": f"{distance}%"} for sentence, distance in similar_sentences]
39
+ return results
40
+
41
+ # Create Gradio interface
42
+ iface = gr.Interface(
43
+ fn=search_interface,
44
+ inputs=gr.Textbox(lines=2, placeholder="Enter your query here..."),
45
+ outputs=gr.JSON(label="Similar Sentences")
46
+ )
47
+
48
+ # Launch the interface
49
+ iface.launch(share=True)
embeddings.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bea0ad0ae5e5cf9a73dad7706c32f651e6596cd5b025a5abd440ca5bde7e006a
3
+ size 40502947
quran_hadith.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6beddedddf73d2e8327e284a0eeed55820a246e6f99e19445c812027b5744cc5
3
+ size 10748559
requirements.txt ADDED
Binary file (3.59 kB). View file