File size: 5,199 Bytes
d3110a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79a8f20
d3110a2
 
 
 
 
 
 
 
 
79a8f20
d3110a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.llms.ctransformers import CTransformers
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain_community.llms import HuggingFaceHub

from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import PyPDFDirectoryLoader

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS

from langchain_community.embeddings import HuggingFaceBgeEmbeddings

from langchain.prompts import PromptTemplate

from langchain.chains import create_retrieval_chain
from langchain.chains import RetrievalQA

from langchain.chains.combine_documents import create_stuff_documents_chain

import os 
import streamlit as st
import fitz  # PyMuPDF
from PIL import Image
import io

DB_FAISS_PATH = 'vectorstores/'
pdf_path = 'Oxford/Oxford-psychiatric-handbook-1-760.pdf'

# custom_prompt_template = '''use the following pieces of information to answer the user's questions.
# If you don't know the answer, please just say that don't know the answer, don't try to make uo an answer.
# Context : {context}
# Question : {question}
# only return the helpful answer below and nothing else.
# '''
custom_prompt_template =  prompt_template="""
Use the following piece of context to answer the question asked.
Please try to provide the answer only based on the context
{context}
Question:{question}
 """
def set_custom_prompt():
    """
    Prompt template for QA retrieval for vector stores
    """
    prompt = PromptTemplate(template = custom_prompt_template,
                            input_variables = ['context','question'])
    
    return prompt
    
os.environ['HUGGINGFACEHUB_API_TOKEN'] = os.getenv('HUGGINGFACEHUB_API_TOKEN')
def load_llm():
    # llm = CTransformers(
    #     model = 'TheBloke/Llama-2-7B-Chat-GGML',
    #     model_type = 'llama',
    #     max_new_token = 512,
    #     temperature = 0.5
    # )
    llm = HuggingFaceHub(
    repo_id = "mistralai/Mistral-7B-v0.1",
        huggingfacehub_api_token=os.getenv('HUGGINGFACEHUB_API_TOKEN'),
    model_kwargs = {'temperature': 0.1, "max_length": 500}
)
    return llm

def retrieval_qa_chain(llm,prompt,db):
    qa_chain = RetrievalQA.from_chain_type(
        llm = llm,
        chain_type = 'stuff',
        retriever = db.as_retriever(search_type = 'similarity',search_kwargs = {'k': 3}),
        return_source_documents = True,
        chain_type_kwargs = {'prompt': prompt}
    )

    return qa_chain

def qa_bot():
    embeddings = HuggingFaceBgeEmbeddings(model_name = 'BAAI/bge-small-en-v1.5',#'sentence-transformers/all-MiniLM-L6-v2',
                                          model_kwargs = {'device':'cpu'},
                                            encode_kwargs = {'normalize_embeddings': True})
    
    
    db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm,qa_prompt, db)

    return qa

def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query' : query})

    return response

def get_pdf_page_as_image(pdf_path, page_number):
    document = fitz.open(pdf_path)
    page = document.load_page(page_number)  
    pix = page.get_pixmap()
    img = Image.open(io.BytesIO(pix.tobytes()))
    return img

# Streamlit webpage title
st.title('Medical Chatbot')

# User input
user_query = st.text_input("Please enter your question:")

# Button to get answer
if st.button('Get Answer'):
    if user_query:
        # Call the function from your chatbot script
        response = final_result(user_query)
        if response:
            # Displaying the response
            st.write("### Answer")
            st.write(response['result'])

            # Displaying source document details if available
            if 'source_documents' in response:
                st.write("### Source Document Information")
                for doc in response['source_documents']:
                    # Retrieve and format page content by replacing '\n' with new line
                    formatted_content = doc.page_content.replace("\\n", "\n")
                    st.write("#### Document Content")
                    st.text_area(label="Page Content", value=formatted_content, height=300)

                    # Retrieve source and page from metadata
                    source = doc.metadata['source']
                    page = doc.metadata['page']
                    st.write(f"Source: {source}")
                    st.write(f"Page Number: {page+1}")
                    
                    # Display the PDF page as an image
                    #source = r"{source}"
                    pdf_page_image = get_pdf_page_as_image(pdf_path, page)
                    st.image(pdf_page_image, caption=f"Page {page+1} from {source}")
                    
        else:
            st.write("Sorry, I couldn't find an answer to your question.")
    else:
        st.write("Please enter a question to get an answer.")