# Load the packages and configurations import os import pandas as pd from retriever import BM25Plus import streamlit as st from groq import Groq # Configure GROQ API_KEY client = Groq( api_key=os.environ.get("GROQ_API_KEY"), ) # RTL formatting of the streamlit st.markdown(""" """, unsafe_allow_html=True) # Streamlit details st.title("DarooGAP") st.sidebar.title("💊 داروگپ 💊") st.sidebar.divider() # Load the dataset df = pd.read_csv('darooyab_qa.csv') corpus = df.loc[:, 'Corpus'].to_list() # Setup the BM25 retriever def get_prompt(query, query_bm25): prompt = 'به سوال کاربر بر اساس متن هایی که در ادامه آمده است پاسخ بدهید' + '\n' prompt += 'اگر قادر به جواب دادن به سوال نبودی، عبارت زیر را خروجی بده:' + '\n' prompt += 'متاسفم در حال حاضر اطلاعات زیادی درباره سوال شما نمی دانم!' + '\n\n' prompt += f'سوال:' + '\n' + query + '\n\n' for idx, topic in enumerate(query_bm25): prompt += f'متن {idx+1}: ' + topic + '\n' return prompt def get_relevant_topics(query, corpus, n=3): # Build the best match 25 base tokenized_corpus = [doc.split(" ") for doc in corpus] bm25 = BM25Plus(tokenized_corpus) tokenized_query = query.split(" ") query_bm = bm25.get_top_n(tokenized_query, corpus, n=n) return query_bm # Chatbot formatting if "messages" not in st.session_state: st.session_state["messages"] = [{"role": "assistant", "content": "چطور می‌توانم به شما کمک کنم؟"}] if "drugs" not in st.session_state: st.session_state["drugs"] = {"drug_name": [], "drug_link": []} for msg in st.session_state.messages: st.chat_message(msg["role"]).write(msg["content"]) if prompt := st.chat_input(): st.session_state.messages.append({"role": "user", "content": prompt}) st.chat_message("user").write(prompt) topics = get_relevant_topics(prompt, corpus, n=5) relevant_drugs = df.iloc[list(topics.values())] st.sidebar.header('داروها') for drug in relevant_drugs.iterrows(): drug_name = drug[1]['Farsi_generic_name'] drug_link = drug[1]['Link'] if drug_name not in st.session_state.drugs['drug_name']: st.session_state.drugs['drug_name'].append(drug_name) st.session_state.drugs['drug_link'].append(drug_link) for i in range(len(st.session_state.drugs['drug_name'])): if st.session_state.drugs['drug_name'][i] in prompt: st.sidebar.link_button(st.session_state.drugs['drug_name'][i], st.session_state.drugs['drug_link'][i]) prompt = get_prompt(prompt, topics) response = client.chat.completions.create( messages=[ { "role": "system", "content": "تو یک دستیار سودمند هستی." }, { "role": "user", "content": prompt, } ], # The language model which will generate the completion. model="llama3-70b-8192", ) msg = response.choices[0].message.content st.session_state.messages.append({"role": "assistant", "content": msg}) st.chat_message("assistant").write(msg)