import streamlit as st from dotenv import load_dotenv from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings from langchain.vectorstores import FAISS from langchain.chat_models import ChatOpenAI from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain.llms import HuggingFaceHub from htmlTemplates import css, bot_template, user_template from streamlit_chat import message import os from docx import Document import requests from requests.auth import HTTPBasicAuth def get_uploaded_text(uploadedFiles): text = "" for uploadedFile in uploadedFiles: file_extension = os.path.splitext(uploadedFile.name)[1] if(file_extension == '.pdf'): pdf_reader = PdfReader(uploadedFile) for page in pdf_reader.pages: text += page.extract_text() elif(file_extension == '.docx'): doc = Document(uploadedFile) for para in doc.paragraphs: text += para.text return text def get_text_chunks(text): text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len ) chunks = text_splitter.split_text(text) return chunks def get_vectorstore(text_chunks): embeddings = OpenAIEmbeddings() # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl") vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) return vectorstore def get_conversation_chain(vectorstore): llm = ChatOpenAI(temperature=0.3) # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}) memory = ConversationBufferMemory( memory_key='chat_history', return_messages=True) conversation_chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=vectorstore.as_retriever(), memory=memory ) return conversation_chain def handle_userinput(user_question, myslot): response = st.session_state.conversation({'question': user_question}) st.session_state.chat_history = response['chat_history'] indexed = response['answer'].find("don't have") != -1 or response['answer'].find("don't know") != -1 if response and response['answer'] and indexed: st.session_state.sr = 0 else: st.session_state.sr = 1 with myslot.container(): for i, msg in enumerate(st.session_state.chat_history): if i % 2 == 0: message(msg.content, is_user=True) else: message(msg.content) def create_jira_ticket(summary, description, project_key, issuetype_name): url = "https://tnq.atlassian.net/rest/api/3/issue" token = "" auth = HTTPBasicAuth("", token) headers = { "Accept": "application/json", "Content-Type": "application/json" } payload = { "fields": { "project": { "key": project_key }, "summary": summary, "customfield_10044": [{"value": "Edit Central All"}], "description": { "type": "doc", "version": 1, "content": [ { "type": "paragraph", "content": [ { "type": "text", "text": "Creating of an issue using project keys and issue type names using the REST API" } ] } ] }, "issuetype": { "name": issuetype_name } } } response = requests.post( url, json=payload, headers=headers, auth=auth ) return response.json() def main(): load_dotenv() st.set_page_config(page_title="AIusBOT", page_icon=":alien:") if "conversation" not in st.session_state: st.session_state.conversation = None if "chat_history" not in st.session_state: st.session_state.chat_history = None if "sr" not in st.session_state: st.session_state.sr = 1 st.header("AIusBOT :alien:") myslot = st.empty() user_question = st.text_input("Ask a question?") if user_question: handle_userinput(user_question, myslot) # if st.button("Create SR?", disabled=st.session_state.sr, type="primary"): # jira_response = create_jira_ticket( # summary=user_question, # description=f"User question that did not receive a satisfactory answer: {user_question}", # project_key="EC", # issuetype_name="Task" # ) # st.write(f"Ticket created: {jira_response.get('key')}") with st.sidebar: st.subheader("Your support Documents") pdf_docs = st.file_uploader( "Upload your Documents here and click on 'Process'", accept_multiple_files=True) if st.button("Process"): with st.spinner("Uploading the docs"): raw_text = get_uploaded_text(pdf_docs) text_chunks = get_text_chunks(raw_text) vectorstore = get_vectorstore(text_chunks) st.session_state.conversation = get_conversation_chain(vectorstore) st.toast("File Process Completed", icon='🎉') if __name__ == '__main__': main()