import re from typing import List import gradio as gr import openai import os from dotenv import load_dotenv import phoenix as px import llama_index from llama_index import OpenAIEmbedding, Prompt, ServiceContext, VectorStoreIndex, SimpleDirectoryReader from llama_index.chat_engine.types import ChatMode from llama_index.llms import ChatMessage, MessageRole, OpenAI from llama_index.vector_stores.qdrant import QdrantVectorStore from llama_index.text_splitter import SentenceSplitter from llama_index.extractors import TitleExtractor from llama_index.ingestion import IngestionPipeline from chat_template import CHAT_TEXT_QA_PROMPT from chatbot import Chatbot, ChatbotVersion from custom_io import UnstructuredReader, default_file_metadata_func from qdrant import client as qdrantClient load_dotenv() openai.api_key = os.getenv("OPENAI_API_KEY") class AwesumCareChatbot(Chatbot): DENIED_ANSWER_PROMPT = "" SYSTEM_PROMPT = "" CHAT_EXAMPLES = [ "什麼是安心三寶?", "點樣立平安紙?" ] def _load_doucments(self): dir_reader = SimpleDirectoryReader('./awesumcare_data', file_extractor={ ".pdf": UnstructuredReader(), ".docx": UnstructuredReader(), ".pptx": UnstructuredReader(), }, recursive=True, exclude=["*.png", "*.pptx"], file_metadata=default_file_metadata_func) self.documents = dir_reader.load_data() super()._load_doucments() def _setup_service_context(self): self.service_context = ServiceContext.from_defaults( chunk_size=self.chunk_size, llm=self.llm, embed_model=self.embed_model ) super()._setup_service_context() def _setup_vector_store(self): self.vector_store = QdrantVectorStore( client=qdrantClient, collection_name=self.vdb_collection_name) super()._setup_vector_store() def _setup_index(self): if self.vdb_collection_name in [col.name for col in qdrantClient.get_collections().collections] and qdrantClient.get_collection(self.vdb_collection_name).vectors_count > 0: self.index = VectorStoreIndex.from_vector_store( self.vector_store, service_context=self.service_context) print("set up index from vector store") return pipeline = IngestionPipeline( transformations=[ SentenceSplitter(), OpenAIEmbedding(), ], vector_store=self.vector_store, ) pipeline.run(documents=self.documents) self.index = VectorStoreIndex.from_vector_store( self.vector_store, service_context=self.service_context) super()._setup_index() # def _setup_index(self): # self.index = VectorStoreIndex.from_documents( # self.documents, # service_context=self.service_context # ) # super()._setup_index() def _setup_chat_engine(self): # testing # from llama_index.agent import OpenAIAgent from llama_index.tools.query_engine import QueryEngineTool query_engine = self.index.as_query_engine( text_qa_template=CHAT_TEXT_QA_PROMPT) query_engine_tool = QueryEngineTool.from_defaults( query_engine=query_engine) self.chat_engine = OpenAIAgent.from_tools( tools=[query_engine_tool], llm=self.service_context.llm, similarity_top_k=1, verbose=True ) print("set up agent as chat engine") # testing # # self.chat_engine = self.index.as_chat_engine( # chat_mode=ChatMode.BEST, # similarity_top_k=5, # text_qa_template=CHAT_TEXT_QA_PROMPT) super()._setup_chat_engine() # gpt-3.5-turbo-1106, gpt-4-1106-preview awesum_chatbot = AwesumCareChatbot(ChatbotVersion.CHATGPT_35.value, chunk_size=2048, vdb_collection_name="v2") def vote(data: gr.LikeData): if data.liked: gr.Info("You up-voted this response: " + data.value) else: gr.Info("You down-voted this response: " + data.value) chatbot = gr.Chatbot() with gr.Blocks() as demo: gr.Markdown("# Awesum Care demo") with gr.Tab("With awesum care data prepared"): gr.ChatInterface( awesum_chatbot.stream_chat, chatbot=chatbot, examples=awesum_chatbot.CHAT_EXAMPLES, ) chatbot.like(vote, None, None) with gr.Tab("With Initial System Prompt (a.k.a. prompt wrapper)"): gr.ChatInterface( awesum_chatbot.predict_with_prompt_wrapper, examples=awesum_chatbot.CHAT_EXAMPLES) with gr.Tab("Vanilla ChatGPT without modification"): gr.ChatInterface(awesum_chatbot.predict_vanilla_chatgpt, examples=awesum_chatbot.CHAT_EXAMPLES) demo.queue() demo.launch()