Spaces:

bacancydataprophets
/

Hitachi-Support-Bot

Sleeping

App Files Files Community

Kuldip2411 commited on Aug 5

Commit

f8817e5

•

1 Parent(s): 8f4d57a

Upload 5 files

Browse files

Files changed (5) hide show

app.py +106 -104
app_config.py +13 -19
faiss_index/index.faiss +0 -0
faiss_index/index.pkl +3 -0
requirements.txt +11 -11

app.py CHANGED Viewed

@@ -1,104 +1,106 @@
-import streamlit as st
-import random
-from app_config import SYSTEM_PROMPT, NLP_MODEL_NAME, NUMBER_OF_VECTORS_FOR_RAG, NLP_MODEL_TEMPERATURE, NLP_MODEL_MAX_TOKENS, VECTOR_MAX_TOKENS
-from functions import get_vectorstore_with_doc_from_pdf, tiktoken_len, get_vectorstore_with_doc_from_word
-from langchain.memory import ConversationSummaryBufferMemory
-from langchain_core.messages import SystemMessage, HumanMessage, AIMessage
-from langchain.chains.summarize import load_summarize_chain
-from langchain.prompts import PromptTemplate
-from langchain_groq import ChatGroq
-from dotenv import load_dotenv
-from pathlib import Path
-import os
-from streamlit_pdf_viewer import pdf_viewer
-env_path = Path('.') / '.env'
-load_dotenv(dotenv_path=env_path)
-def response_generator(prompt: str) -> str:
-    """this function can be used for general quetion answers which are related to tyrex and tyre recycling
-    Args:
-        prompt (string): user query
-    Returns:
-        string: answer of the query
-    """
-    try:
-        retriever = st.session_state.retriever
-        docs = retriever.invoke(prompt)
-        my_context = [doc.page_content for doc in docs]
-        my_context = '\n\n'.join(my_context)
-        system_message = SystemMessage(content = SYSTEM_PROMPT.format(context=my_context, previous_message_summary=st.session_state.rag_memory.moving_summary_buffer))
-        chat_messages = (system_message + st.session_state.rag_memory.chat_memory.messages + HumanMessage(content=prompt)).messages
-        print("total tokens: ", tiktoken_len(str(chat_messages)))
-        # print("my_context*********",my_context)
-        response = st.session_state.llm.invoke(chat_messages)
-        return response.content
-    except Exception as error:
-        print(error)
-        return "Oops! something went wrong, please try again."
-st.markdown(
-    """
-<style>
-    .st-emotion-cache-janbn0 {
-        flex-direction: row-reverse;
-        text-align: right;
-    }
-</style>
-""",
-    unsafe_allow_html=True,
-)
-# When user gives input
-with st.sidebar:
-    st.header("Hitachi Support Bot")
-    button = st.toggle("View Doc file.")
-if button:
-    pdf_viewer("GPT OUTPUT.pdf")
-else:
-    print("SYSTEM MESSAGE")
-    if "messages" not in st.session_state:
-        st.session_state.messages=[{"role": "system", "content": SYSTEM_PROMPT}]
-    print("SYSTEM MODEL")
-    if "llm" not in st.session_state:
-        st.session_state.llm = ChatGroq(temperature=NLP_MODEL_TEMPERATURE, groq_api_key=str(os.getenv('GROQ_API_KEY')), model_name=NLP_MODEL_NAME)
-    print("rag")
-    if "rag_memory" not in st.session_state:
-        st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit= 5000)
-    print("retrival")
-    if "retriever" not in st.session_state:
-        # vector_store = get_vectorstore_with_doc_from_pdf('GPT OUTPUT.pdf')
-        vector_store = get_vectorstore_with_doc_from_word('GPT OUTPUT.docx')
-        st.session_state.retriever = vector_store.as_retriever(k=NUMBER_OF_VECTORS_FOR_RAG)
-    print("container")
-    # Display chat messages from history
-    container =  st.container(height=700)
-    for message in st.session_state.messages:
-        if message["role"] != "system":
-            with container.chat_message(message["role"]):
-                st.write(message["content"])
-    if prompt := st.chat_input("Enter your query here... "):
-        with container.chat_message("user"):
-            st.write(prompt)
-        st.session_state.messages.append({"role":"user" , "content":prompt})
-        with container.chat_message("assistant"):
-            response = response_generator(prompt=prompt)
-            print("******************************************************** Response ********************************************************")
-            print("MY RESPONSE IS:", response)
-            st.write(response)
-        print("Response is:", response)
-        st.session_state.rag_memory.save_context({'input': prompt}, {'output': response})
-        st.session_state.messages.append({"role":"assistant" , "content":response})

+import os
+import base64
+from io import BytesIO
+from PIL import Image
+import streamlit as st
+from app_config import SYSTEM_PROMPT,MODEL,MAX_TOKENS,TRANSFORMER_MODEL
+from langchain.memory import ConversationSummaryBufferMemory
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_groq import ChatGroq
+from streamlit_pdf_viewer import pdf_viewer
+from pydantic import BaseModel
+from langchain.chains import LLMChain
+from langchain.prompts import ChatPromptTemplate
+from langchain_community.vectorstores import FAISS
+from sentence_transformers import SentenceTransformer
+from typing import Any
+st.title("Hitachi Support Bot")
+class Element(BaseModel):
+    type: str
+    text: Any
+# llm = ChatGoogleGenerativeAI(
+#     model=MODEL,
+#     max_tokens=MAX_TOKENS
+# )
+llm = ChatGroq(model=MODEL,api_key='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')
+prompt = ChatPromptTemplate.from_template(SYSTEM_PROMPT)
+qa_chain = LLMChain(llm=llm,prompt=prompt)
+embeddings = HuggingFaceEmbeddings(model_name=TRANSFORMER_MODEL)
+db = FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)
+st.markdown(
+    """
+<style>
+    .st-emotion-cache-janbn0 {
+        flex-direction: row-reverse;
+        text-align: right;
+    }
+</style>
+""",
+    unsafe_allow_html=True,
+)
+def response_generator(question):
+    relevant_docs = db.similarity_search_with_relevance_scores(question,k=5)
+    context = ""
+    relevant_images = []
+    for d,score in relevant_docs:
+        if score > 0:
+            if d.metadata['type'] == 'text':
+                context += str(d.metadata['original_content'])
+            elif d.metadata['type'] == 'table':
+                context += str(d.metadata['original_content'])
+            elif d.metadata['type'] == 'image':
+                context += d.page_content
+                relevant_images.append(d.metadata['original_content'])
+    result =  qa_chain.run({'context':context,"question":question})
+    return result,relevant_images
+with st.sidebar:
+    st.header("Hitachi Support Bot")
+    button = st.toggle("View Doc file.")
+if button:
+    pdf_viewer("GPT OUTPUT.pdf")
+else:
+    if "messages" not in st.session_state:
+        st.session_state.messages=[{"role": "system", "content": SYSTEM_PROMPT}]
+    if "llm" not in st.session_state:
+        st.session_state.llm = llm
+    if "rag_memory" not in st.session_state:
+        st.session_state.rag_memory = ConversationSummaryBufferMemory(llm=st.session_state.llm, max_token_limit= 5000)
+    container =  st.container(height=700)
+    for message in st.session_state.messages:
+        if message["role"] != "system":
+            if message["role"] == "user":
+                with container.chat_message(message["role"]):
+                    st.write(message["content"])
+            if message["role"] == "assistant":
+                with container.chat_message(message["role"]):
+                    st.write(message["content"])
+                    for i in range(len(message["images"])):
+                        st.image(Image.open(BytesIO(base64.b64decode(message["images"][i].encode('utf-8')))))
+    if prompt := st.chat_input("Enter your query here... "):
+        with container.chat_message("user"):
+            st.write(prompt)
+        st.session_state.messages.append({"role":"user" , "content":prompt})
+        with container.chat_message("assistant"):
+            response,images = response_generator(prompt)
+            st.write(response)
+            for i in range(len(images)):
+                st.markdown("""---""")
+                st.image(Image.open(BytesIO(base64.b64decode(images[i].encode('utf-8')))))
+            st.markdown("""---""")
+        st.session_state.rag_memory.save_context({'input': prompt}, {'output': response})
+        st.session_state.messages.append({"role":"assistant" , "content":response,'images':images})

app_config.py CHANGED Viewed

@@ -1,19 +1,13 @@
-SYSTEM_PROMPT = """
-1. You are Support bot for hitachi corporation. You must answer of any user questions using context only.
-2. if you can't provide the answer of the quetions then only tell them "Thank you for your question! I'm here to help with information related to Hitachi corporation.the answer of this question is not given in this video. If you have any queries about those topics, feel free to ask. For other questions, I recommend reaching out to the appropriate source." nothing else.
-3. User can also give you some greetings like thank you, welcome, please, sorry etc... so you have to handle it appropriately without giving any unnecessary information which is not wanted by user.
-4. any information must be answered from provided context only, you must not to answer outside to the context.
-context: {context}
-"""
-NLP_MODEL_NAME = "llama3-70b-8192"
-REASONING_MODEL_NAME = "mixtral-8x7b-32768"
-REASONING_MODEL_TEMPERATURE = 0
-NLP_MODEL_TEMPERATURE = 0
-NLP_MODEL_MAX_TOKENS = 5400
-VECTOR_MAX_TOKENS = 6000
-VECTORS_TOKEN_OVERLAP_SIZE = 20
-NUMBER_OF_VECTORS_FOR_RAG = 1

+SYSTEM_PROMPT =  """
+You are Support bot for hitachi corporation. You must answer of any user questions using context only.
+You have context. make answer using context only and answer must be concise and also if needed use bullet points and other markups.
+don't metion figure number, table number,.
+context: {context}
+Give the answer of this Question: {question}
+"""
+# MODEL = "gemini-1.5-flash"
+MODEL = "llama-3.1-70b-versatile"
+MAX_TOKENS = 4000
+TRANSFORMER_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

faiss_index/index.faiss ADDED Viewed

Binary file (223 kB). View file

faiss_index/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9714a57f85868cb0d2a61778b84593da45764246cfeb039ec0d933f2984edf7f
+size 4279867

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
-streamlit
-langchain
-langchain_groq
-python-dotenv
-langchain_community
-langchain_chroma
-tiktoken
-sentence_transformers
-pymupdf
-docx2txt
-streamlit_pdf_viewer

+langchain==0.2.12
+langchain_google_genai==1.0.8
+sentence_transformers==3.0.1
+streamlit==1.37.0
+streamlit_pdf_viewer==0.0.14
+langchain_community==0.2.11
+torch==2.3.1
+torchaudio==2.3.1
+torchvision==0.18.1
+faiss-cpu==1.8.0
+langchain_groq