Spaces:
Runtime error
Runtime error
File size: 3,791 Bytes
edd3ce2 700408f 7e487e9 edd3ce2 9bdb9a6 45dddff 7e487e9 45dddff 7e487e9 9bdb9a6 edd3ce2 7c29081 7e487e9 edd3ce2 7c29081 7e487e9 7c29081 edd3ce2 7c29081 7e487e9 edd3ce2 7c29081 7e487e9 edd3ce2 f58a83a edd3ce2 fc0b62a edd3ce2 42c8f6d 926ae72 fc0b62a edd3ce2 7c29081 7e487e9 700408f edd3ce2 1b0b902 edd3ce2 1b0b902 e0f616c edd3ce2 8b40668 edd3ce2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import gradio as gr
from sentence_transformers import SentenceTransformer
import pandas as pd
#import pickle
import torch
from pathlib import Path
import time
from datetime import datetime
model = SentenceTransformer('intfloat/multilingual-e5-large-instruct')
print("load model")
print(datetime.fromtimestamp(time.time()))
hadiths = pd.read_csv('all_hadiths_clean.csv', delimiter=",")
document_embeddings = torch.load('encoded_hadiths_multilingual-e5-large-instruct (1).sav',map_location ='cpu')
#file = open('encoded_hadiths_multilingual-e5-large-instruct (1).sav','rb')
#document_embeddings = pickle.load(file)
print("load hadiths")
print(datetime.fromtimestamp(time.time()))
# Define the function to categorize ages
def categorize_source(source):
if "Nasa'i" in source:
return 'nasai'
elif "Majah" in source:
return 'ibnmajah'
elif "Da'ud" in source:
return 'abudawud'
elif "Muslim" in source:
return 'muslim'
elif "Bukhari" in source:
return 'bukhari'
elif "Tirmidhi" in source:
return 'tirmidhi'
else:
return ''
def find(query):
def get_detailed_instruct(task_description: str, query: str) -> str:
return f'Instruct: {task_description}\nQuery: {query}'
# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = [
get_detailed_instruct(task, query)
]
print("start")
print(datetime.fromtimestamp(time.time()))
query_embeddings = model.encode(queries, convert_to_tensor=True, normalize_embeddings=True)
print("embed query")
print(datetime.fromtimestamp(time.time()))
scores = (query_embeddings @ document_embeddings.T) * 100
print("consine similarity")
print(datetime.fromtimestamp(time.time()))
# insert the similarity value to dataframe & sort it
hadiths['similarity'] = scores.tolist()[0]
sorted_hadiths = hadiths.sort_values(by='similarity', ascending=False)
print("sort hadiths")
print(datetime.fromtimestamp(time.time()))
results = sorted_hadiths.head(3).drop(columns=['id', 'hadith_id', 'chain_indx'])
results['source_cat'] = results['source'].apply(categorize_source)
results['hadith_no'] = results['hadith_no'].str.strip()
url = 'https://sunnah.com/'+results['source_cat'].astype(str)+':'+results['hadith_no'].astype(str)
#results['text_ar'] = '<a href="'+url+'">'+results['text_ar']+'</a>'
results = results.drop(columns=['source_cat'])
results['source'] = results['source'] + "[chapter " + results['chapter_no'].astype(str) + " , hadith " + results['hadith_no'].astype(str) + "]"
results['text'] = '<a href="'+url+'">'+results['text_en']+ '</a>' + ' (' + results['source'].astype(str) + ')'
results = results.drop(columns=['source', 'chapter_no', 'hadith_no', 'chapter', 'similarity', 'text_ar', 'text_en'])
print("prepare results")
print(datetime.fromtimestamp(time.time()))
#return sorted_quran
#filepath = Path(query+'.csv')
#results.to_csv(filepath,index=False)
#return results, filepath
return results
demo = gr.Interface(
fn=find,
inputs="textbox",
#outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True),gr.DownloadButton()],
outputs=[gr.Dataframe(headers=['text'],datatype=["markdown"],wrap=True)],
cache_examples="lazy",
examples=[
["law of inheritance in islam"],
["tunjukilah jalan yang lurus"],
["عائشة"],
],
title="Hadiths Finder")
#demo = gr.Interface(fn=find, inputs="textbox", outputs="textbox")
if __name__ == "__main__":
demo.launch() |