from dotenv import load_dotenv import pandas as pd import streamlit as st import streamlit_authenticator as stauth from streamlit_modal import Modal from utils import new_file, clear_memory, append_documentation_to_sidebar, load_authenticator_config, init_qa, \ append_header from haystack.document_stores.in_memory import InMemoryDocumentStore from haystack import Document load_dotenv() OPENAI_MODELS = ['gpt-3.5-turbo', "gpt-4", "gpt-4-1106-preview"] OPEN_MODELS = [ 'mistralai/Mistral-7B-Instruct-v0.1', 'HuggingFaceH4/zephyr-7b-beta' ] def reset_chat_memory(): st.button( 'Reset chat memory', key="reset-memory-button", on_click=clear_memory, help="Clear the conversational memory. Currently implemented to retain the 4 most recent messages.", disabled=False) def manage_files(modal, document_store): open_modal = st.sidebar.button("Manage Files", use_container_width=True) if open_modal: modal.open() if modal.is_open(): with modal.container(): uploaded_file = st.file_uploader( "Upload a CV in PDF format", type=("pdf",), on_change=new_file(), disabled=st.session_state['document_qa_model'] is None, label_visibility="collapsed", help="The document is used to answer your questions. The system will process the document and store it in a RAG to answer your questions.", ) edited_df = st.data_editor(use_container_width=True, data=st.session_state['files'], num_rows='dynamic', column_order=['name', 'size', 'is_active'], column_config={'name': {'editable': False}, 'size': {'editable': False}, 'is_active': {'editable': True, 'type': 'checkbox', 'width': 100}} ) st.session_state['files'] = pd.DataFrame(columns=['name', 'content', 'size', 'is_active']) if uploaded_file: st.session_state['file_uploaded'] = True st.session_state['files'] = pd.concat([st.session_state['files'], edited_df]) with st.spinner('Processing the CV content...'): store_file_in_table(document_store, uploaded_file) ingest_document(uploaded_file) def ingest_document(uploaded_file): if not st.session_state['document_qa_model']: st.warning('Please select a model to start asking questions') else: try: st.session_state['document_qa_model'].ingest_pdf(uploaded_file) st.success('Document processed successfully') except Exception as e: st.error(f"Error processing the document: {e}") st.session_state['file_uploaded'] = False def store_file_in_table(document_store, uploaded_file): pdf_content = uploaded_file.getvalue() st.session_state['pdf_content'] = pdf_content st.session_state.messages = [] document = Document(content=pdf_content, meta={"name": uploaded_file.name}) df = pd.DataFrame(st.session_state['files']) df['is_active'] = False st.session_state['files'] = pd.concat([df, pd.DataFrame( [{"name": uploaded_file.name, "content": pdf_content, "size": len(pdf_content), "is_active": True}])]) document_store.write_documents([document]) def init_session_state(): st.session_state.setdefault('files', pd.DataFrame(columns=['name', 'content', 'size', 'is_active'])) st.session_state.setdefault('models', []) st.session_state.setdefault('api_keys', {}) st.session_state.setdefault('current_selected_model', 'gpt-3.5-turbo') st.session_state.setdefault('current_api_key', '') st.session_state.setdefault('messages', []) st.session_state.setdefault('pdf_content', None) st.session_state.setdefault('memory', None) st.session_state.setdefault('pdf', None) st.session_state.setdefault('document_qa_model', None) st.session_state.setdefault('file_uploaded', False) def set_page_config(): st.set_page_config( page_title="CV Insights AI Assistant", page_icon=":shark:", initial_sidebar_state="expanded", layout="wide", menu_items={ 'Get Help': 'https://www.extremelycoolapp.com/help', 'Report a bug': "https://www.extremelycoolapp.com/bug", 'About': "# This is a header. This is an *extremely* cool app!" } ) def update_running_model(api_key, model): st.session_state['api_keys'][model] = api_key st.session_state['document_qa_model'] = init_qa(model, api_key) def init_api_key_dict(): st.session_state['models'] = OPENAI_MODELS + list(OPEN_MODELS) + ['local LLM'] for model_name in OPENAI_MODELS: st.session_state['api_keys'][model_name] = None def display_chat_messages(chat_box, chat_input): with chat_box: if chat_input: for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"], unsafe_allow_html=True) st.chat_message("user").markdown(chat_input) with st.chat_message("assistant"): # process user input and generate response response = st.session_state['document_qa_model'].inference(chat_input, st.session_state.messages) st.markdown(response) st.session_state.messages.append({"role": "user", "content": chat_input}) st.session_state.messages.append({"role": "assistant", "content": response}) def setup_model_selection(): model = st.selectbox( "Model:", options=st.session_state['models'], index=0, # default to the first model in the list gpt-3.5-turbo placeholder="Select model", help="Select an LLM:" ) if model: if model != st.session_state['current_selected_model']: st.session_state['current_selected_model'] = model if model == 'local LLM': st.session_state['document_qa_model'] = init_qa(model) api_key = st.sidebar.text_input("Enter LLM-authorization Key:", type="password", disabled=st.session_state['current_selected_model'] == 'local LLM') if api_key and api_key != st.session_state['current_api_key']: update_running_model(api_key, model) st.session_state['current_api_key'] = api_key return model def setup_task_selection(model): # enable extractive and generative tasks if we're using a local LLM or an OpenAI model with an API key if model == 'local LLM' or st.session_state['api_keys'].get(model): task_options = ['Extractive', 'Generative'] else: task_options = ['Extractive'] task_selection = st.sidebar.radio('Select the task:', task_options) # TODO: Add the task selection logic here (initializing the model based on the task) def setup_page_body(): chat_box = st.container(height=350, border=False) chat_input = st.chat_input( placeholder="Upload a document to start asking questions...", disabled=not st.session_state['file_uploaded'], ) if st.session_state['file_uploaded']: display_chat_messages(chat_box, chat_input) class StreamlitApp: def __init__(self): self.authenticator_config = load_authenticator_config() self.document_store = InMemoryDocumentStore() set_page_config() self.authenticator = self.init_authenticator() init_session_state() init_api_key_dict() def init_authenticator(self): return stauth.Authenticate( self.authenticator_config['credentials'], self.authenticator_config['cookie']['name'], self.authenticator_config['cookie']['key'], self.authenticator_config['cookie']['expiry_days'] ) def setup_sidebar(self): with st.sidebar: st.sidebar.image("resources/ml_logo.png", use_column_width=True) # Sidebar for Task Selection st.sidebar.header('Options:') model = setup_model_selection() setup_task_selection(model) st.divider() self.authenticator.logout() reset_chat_memory() modal = Modal("Manage Files", key="demo-modal") manage_files(modal, self.document_store) st.divider() append_documentation_to_sidebar() def run(self): name, authentication_status, username = self.authenticator.login() if authentication_status: self.run_authenticated_app() elif st.session_state["authentication_status"] is False: st.error('Username/password is incorrect') elif st.session_state["authentication_status"] is None: st.warning('Please enter your username and password') def run_authenticated_app(self): self.setup_sidebar() append_header() setup_page_body() app = StreamlitApp() app.run()