from ragatouille import RAGPretrainedModel import os import gradio as gr path_to_index = 'colbert/indexes/ArColbertQuran' message = "waiting to load index ..." if os.path.exists(path_to_index): RAG = RAGPretrainedModel.from_index(path_to_index) message = "index loaded!" print(message) import gradio as gr def process_results(results): answer = "" for r in results: answer += f"Sura: {r['document_id']} ({r['document_metadata']}) \n Text:{r['content']}\n\n" return answer k = 3 # How many documents you want to retrieve def answer_fn(query): results = RAG.search(query= query, k=k) return process_results(results) qapp = gr.Interface(fn=answer_fn, inputs="textbox", outputs="textbox", examples=[ "ما أهمية كتابة المعاملات؟", "أخبرني عن عذاب الله للمنافقين", "حسن معاملة الوالدين", "ما معجزات سيدنا عيسى", "ما هو التطفيف" ,"ما قصة المؤمنين الذين قتلوا في الحفرة؟"], title="Qur\'an Retrieval Demo - Semantic Search", description="A basic demo based on Arabic ColBERT (100k triplets) and simple text of the Qur\'an. First query may take a minute, then much faster.\ Try to include relevant terms - this is just retrieval, not LLM chat and Qur\'an is an edge case.\ For details, see: https://www.linkedin.com/posts/akhooli_arabic-1-million-curated-triplets-dataset-activity-7222951839774699521-PZcw",) if __name__ == "__main__": qapp.launch()