# Import required libraries import os import pandas as pd import streamlit as st from transformers import pipeline from sentence_transformers import SentenceTransformer, util import requests import json from pyngrok import ngrok # Set up Hugging Face API token api_key = os.getenv("HF_API_KEY") # Load the CSV dataset data = pd.read_csv('genetic-Final.csv') # Drop unnecessary columns (Unnamed columns) data = data.drop(columns=['Unnamed: 0', 'Unnamed: 11', 'Unnamed: 12', 'Unnamed: 13']) # Combine relevant columns into one combined description field data['combined_description'] = ( data['Symptoms'].fillna('') + " " + data['Severity Level'].fillna('') + " " + data['Risk Assessment'].fillna('') + " " + data['Treatment Options'].fillna('') + " " + data['Suggested Medical Tests'].fillna('') + " " + data['Minimum Values for Medical Tests'].fillna('') + " " + data['Emergency Treatment'].fillna('') ) # Initialize the Sentence Transformer model for embeddings retriever_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') # Function to safely generate embeddings for each row def generate_embedding(description): if description: # Check if the description is not empty or NaN return retriever_model.encode(description).tolist() # Convert the numpy array to list else: return [] # Generate embeddings for the combined description data['embeddings'] = data['combined_description'].apply(generate_embedding) # Function to retrieve relevant information from CSV dataset based on user query def get_relevant_info(query, top_k=3): query_embedding = retriever_model.encode(query) similarities = [util.cos_sim(query_embedding, doc_emb)[0][0].item() for doc_emb in data['embeddings']] top_indices = sorted(range(len(similarities)), key=lambda i: similarities[i], reverse=True)[:top_k] return data.iloc[top_indices] # Function to generate response using Hugging Face Model API def generate_response(input_text): api_url = "https://api-inference.huggingface.co/models/m42-health/Llama3-Med42-8B" headers = {"Authorization": f"Bearer {os.environ['HUGGINGFACEHUB_API_TOKEN']}"} payload = {"inputs": input_text} response = requests.post(api_url, headers=headers, json=payload) return json.loads(response.content.decode("utf-8"))[0]["generated_text"] # Streamlit UI for the Chatbot def main(): st.title("Medical Report and Analysis Chatbot") st.sidebar.header("Upload Medical Report or Enter Query") # Text input for user queries user_query = st.sidebar.text_input("Type your question or query") # File uploader for medical report uploaded_file = st.sidebar.file_uploader("Upload a medical report (optional)", type=["txt", "pdf", "csv"]) # Process the query if provided if user_query: st.write("### Query Response:") # Retrieve relevant information from dataset relevant_info = get_relevant_info(user_query) st.write("#### Relevant Medical Information:") for i, row in relevant_info.iterrows(): st.write(f"- {row['combined_description']}") # Generate a response from the Llama3-Med42-8B model response = generate_response(user_query) st.write("#### Model's Response:") st.write(response) # Process the uploaded file (if any) if uploaded_file: # Display analysis of the uploaded report file st.write("### Uploaded Report Analysis:") report_text = "Extracted report content here" # Placeholder for file processing logic st.write(report_text) # Start Streamlit app in Colab using ngrok if __name__ == "__main__": main()