File size: 3,791 Bytes
edd3ce2
 
 
 
 
 
700408f
7e487e9
edd3ce2
9bdb9a6
45dddff
7e487e9
45dddff
 
 
 
 
 
7e487e9
9bdb9a6
edd3ce2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c29081
7e487e9
edd3ce2
 
7c29081
7e487e9
7c29081
edd3ce2
7c29081
7e487e9
edd3ce2
 
 
 
7c29081
7e487e9
edd3ce2
f58a83a
edd3ce2
 
 
 
fc0b62a
edd3ce2
42c8f6d
926ae72
fc0b62a
edd3ce2
7c29081
7e487e9
700408f
edd3ce2
1b0b902
 
 
 
edd3ce2
 
 
 
1b0b902
 
e0f616c
edd3ce2
 
 
8b40668
edd3ce2
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
#import pickle
import torch
from pathlib import Path  
import time
from datetime import datetime

model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
print("load model")
print(datetime.fromtimestamp(time.time()))

hadiths = pd.read_csv('all_hadiths_clean.csv', delimiter=",")
document_embeddings = torch.load('encoded_hadiths_multilingual-e5-large-instruct (1).sav',map_location ='cpu')
#file = open('encoded_hadiths_multilingual-e5-large-instruct (1).sav','rb')
#document_embeddings = pickle.load(file)
print("load hadiths")
print(datetime.fromtimestamp(time.time()))

# Define the function to categorize ages
def categorize_source(source):
    if "Nasa'i" in source:
        return 'nasai'
    elif "Majah" in source:
        return 'ibnmajah'
    elif "Da'ud" in source:
        return 'abudawud'
    elif "Muslim" in source:
        return 'muslim'
    elif "Bukhari" in source:
        return 'bukhari'
    elif "Tirmidhi" in source:
        return 'tirmidhi'
    else:
        return ''

def find(query):
    def get_detailed_instruct(task_description: str, query: str) -> str:
        return f'Instruct: {task_description}\nQuery: {query}'
    
    # Each query must come with a one-sentence instruction that describes the task
    task = 'Given a web search query, retrieve relevant passages that answer the query'
    queries = [
        get_detailed_instruct(task, query)
    ]
    print("start")
    print(datetime.fromtimestamp(time.time()))
    
    query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
    print("embed query")
    print(datetime.fromtimestamp(time.time()))
    
    scores = (query_embeddings @ document_embeddings.T) * 100
    print("consine similarity")
    print(datetime.fromtimestamp(time.time()))

    # insert the similarity value to dataframe & sort it
    hadiths['similarity'] = scores.tolist()[0]
    sorted_hadiths = hadiths.sort_values(by='similarity', ascending=False)
    print("sort hadiths")
    print(datetime.fromtimestamp(time.time()))
    
    results = sorted_hadiths.head(3).drop(columns=['id', 'hadith_id', 'chain_indx'])
    results['source_cat'] = results['source'].apply(categorize_source)
    results['hadith_no'] = results['hadith_no'].str.strip()

    url = 'https://sunnah.com/'+results['source_cat'].astype(str)+':'+results['hadith_no'].astype(str)
    #results['text_ar'] = '<a href="'+url+'">'+results['text_ar']+'</a>'
    results = results.drop(columns=['source_cat'])
    results['source'] = results['source'] + "[chapter " + results['chapter_no'].astype(str) + " , hadith " + results['hadith_no'].astype(str) + "]"
    results['text'] = '<a href="'+url+'">'+results['text_en']+ '</a>' + ' (' + results['source'].astype(str) + ')'
    results = results.drop(columns=['source', 'chapter_no', 'hadith_no', 'chapter', 'similarity', 'text_ar', 'text_en'])
    
    print("prepare results")
    print(datetime.fromtimestamp(time.time()))
    
    #return sorted_quran
    #filepath = Path(query+'.csv')  
    #results.to_csv(filepath,index=False)  
    #return results, filepath
    return results
    
demo = gr.Interface(
    fn=find, 
    inputs="textbox", 
    #outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True),gr.DownloadButton()],  
    outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True)],  
    cache_examples="lazy",
    examples=[
                ["law of inheritance in islam"],
                ["tunjukilah jalan yang lurus"],
                ["عائشة"],
            ],
    title="Hadiths Finder")
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
    
if __name__ == "__main__":
    demo.launch()