Spaces:

hoshingakag
/

g-palm-chat

Running

App Files Files Community

hoshingakag commited on Oct 28, 2023

Commit

ecbd714

•

1 Parent(s): e97bd62

more chat

Browse files

Files changed (2) hide show

app.py +69 -170
src/llamaindex_palm.py +302 -8

app.py CHANGED Viewed

@@ -1,28 +1,18 @@
-import os
-import time
-import datetime
 import gradio as gr
-import google.generativeai as genai
-from src.llamaindex_palm import LlamaIndexPaLM
-import wandb
-from wandb.sdk.data_types.trace_tree import Trace
 import logging
-logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S %p', level=logging.INFO)
-logger = logging.getLogger('llm')
-# Llama-Index LLM
-llm = LlamaIndexPaLM()
-llm.set_index_from_pinecone()
-# Credentials
-genai.configure(api_key=os.getenv('PALM_API_KEY'))
-# W&B
-wandb.init(project=os.getenv('WANDB_PROJECT'))
 # Gradio
 chat_history = []
@@ -32,7 +22,7 @@ def clear_chat() -> None:
     chat_history = []
     return None
-def get_chat_history(chat_history) -> str:
     ind = 0
     formatted_chat_history = ""
     for message in chat_history:
@@ -40,162 +30,71 @@ def get_chat_history(chat_history) -> str:
         ind += 1
     return formatted_chat_history
-def generate_chat(prompt: str, llamaindex_llm: LlamaIndexPaLM):
     global chat_history
-    # get chat history
-    context_chat_history = "\n".join(list(filter(None, chat_history)))
     logger.info("Generating Message...")
     logger.info(f"User Message:\n{prompt}\n")
-    chat_history.append(prompt)
-    # w&b trace start
-    start_time_ms = round(datetime.datetime.now().timestamp() * 1000)
-    root_span = Trace(
-        name="LLMChain",
-        kind="chain",
-        start_time_ms=start_time_ms,
-        metadata={"user": "Gradio"},
-    )
-    # get context
-    context_from_index = llamaindex_llm.generate_response(prompt)
-    logger.info(f"Context from Llama-Index:\n{context_from_index}\n")
-    # w&b trace agent
-    agent_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)
-    agent_span = Trace(
-        name="Agent",
-        kind="agent",
-        status_code="success",
-        metadata={
-            "framework": "Llama-Index",
-            "index_type": "VectorStoreIndex",
-            "vector_store": "Pinecone",
-            "vector_store_index": llamaindex_llm._index_name,
-            "vector_store_namespace": llamaindex_llm._index_namespace,
-            "model_name": llamaindex_llm.llm._model_name,
-            # "temperture": 0.7,
-            # "top_k": 40,
-            # "top_p": 0.95,
-            "custom_kwargs": llamaindex_llm.llm._model_kwargs,
-        },
-        start_time_ms=start_time_ms,
-        end_time_ms=agent_end_time_ms,
-        inputs={"query": prompt},
-        outputs={"response": context_from_index},
-    )
-    root_span.add_child(agent_span)
-    prompt_with_context = f"""
-    [System]
-        You are in a role play of Gerard Lee and you need to pretend to be him to answer questions from people who interested in Gerard's background.
-        Respond the User Query below in no more than 5 complete sentences, unless specifically asked by the user to elaborate on something. Use only the History and Context to inform your answers.
-    [History]
-        {context_chat_history}
-    [Context]
-        {context_from_index}
-    [User Query]
-        {prompt}
-    """
-    try:
-        response = genai.generate_text(
-            prompt=prompt_with_context,
-            safety_settings=[
-                {
-                    'category': genai.types.HarmCategory.HARM_CATEGORY_UNSPECIFIED,
-                    'threshold': genai.types.HarmBlockThreshold.BLOCK_NONE,
-                },
-            ],
-            temperature=0.9,
-        )
-        result = response.result
-        success_flag = "success"
-        if result is None:
-            result = "Seems something went wrong. Please try again later."
-            logger.error(f"Result with 'None' received\n")
-            success_flag = "fail"
     except Exception as e:
-        result = "Seems something went wrong. Please try again later."
-        logger.error(f"Exception {e} occured\n")
-        success_flag = "fail"
-    chat_history.append(result)
-    logger.info(f"Bot Message:\n{result}\n")
-    # w&b trace llm
-    llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)
-    llm_span = Trace(
-        name="LLM",
-        kind="llm",
-        status_code=success_flag,
-        start_time_ms=agent_end_time_ms,
-        end_time_ms=llm_end_time_ms,
-        inputs={"input": prompt_with_context},
-        outputs={"result": result},
-    )
-    root_span.add_child(llm_span)
-    # w&b finalize trace
-    root_span.add_inputs_and_outputs(
-        inputs={"query": prompt}, outputs={"result": result}
-    )
-    root_span._span.end_time_ms = llm_end_time_ms
-    root_span.log(name="llm_app_trace")
-    return result
-with gr.Blocks() as app:
-    chatbot = gr.Chatbot(
-        bubble_full_width=False,
-        container=True,
-        show_share_button=False,
-        avatar_images=[None, './asset/akag-g-only.png']
-    )
-    msg = gr.Textbox(
-        show_label=False,
-        label="Type your message...",
-        placeholder="Hi Gerard, can you introduce yourself?",
-        container=False,
-    )
-    with gr.Row():
-         clear = gr.Button("Clear", scale=1)
-         send = gr.Button(
-             value="",
-             variant="primary",
-             icon="./asset/send-message.png",
-             scale=1
-         )
-    def user(user_message, history):
-        return "", history + [[user_message, None]]
-    def bot(history):
-        bot_message = generate_chat(history[-1][0], llm)
-        history[-1][1] = ""
-        for character in bot_message:
-            history[-1][1] += character
-            time.sleep(0.01)
-            yield history
-    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot, chatbot, chatbot
-    )
-    send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
-        bot, chatbot, chatbot
-    )
-    clear.click(clear_chat, None, chatbot, queue=False)
-    gr.HTML("""
-        <p><center><i>Disclaimer: This is a RAG app for demostration purpose. LLM hallucination might occur.</i></center></p>
-        <p><center>Hosted on 🤗 Spaces. Powered by Google PaLM 🌴</center></p>
-    """)
-app.queue()
-app.launch()

+from src.llamaindex_palm import LlamaIndexPaLM, LlamaIndexPaLMText
 import gradio as gr
+from typing import List
+import time
 import logging
+# import dotenv
+# dotenv.load_dotenv(".env")
+# Llama-Index LLM
+llm_backend = LlamaIndexPaLMText(model_kwargs={'temperature': 0.8})
+llm = LlamaIndexPaLM(model=llm_backend)
+llm.get_index_from_pinecone()
 # Gradio
 chat_history = []
     chat_history = []
     return None
+def get_chat_history(chat_history: List[str]) -> str:
     ind = 0
     formatted_chat_history = ""
     for message in chat_history:
         ind += 1
     return formatted_chat_history
+def generate_text(prompt: str, llamaindex_llm: LlamaIndexPaLM):
     global chat_history
     logger.info("Generating Message...")
     logger.info(f"User Message:\n{prompt}\n")
+    result = llamaindex_llm.generate_text(prompt, chat_history)
+    chat_history.append(prompt)
+    chat_history.append(result)
+    logger.info(f"Replied Message:\n{result}\n")
+    return result
+if __name__ == "__main__":
+    logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S %p', level=logging.INFO)
+    logger = logging.getLogger('app')
+    try:
+        with gr.Blocks() as app:
+            chatbot = gr.Chatbot(
+                bubble_full_width=False,
+                container=True,
+                show_share_button=False,
+                avatar_images=[None, './asset/akag-g-only.png']
+            )
+            msg = gr.Textbox(
+                show_label=False,
+                label="Type your message...",
+                placeholder="Hi Gerard, can you introduce yourself?",
+                container=False,
+            )
+            with gr.Row():
+                clear = gr.Button("Clear", scale=1)
+                send = gr.Button(
+                    value="",
+                    variant="primary",
+                    icon="./asset/send-message.png",
+                    scale=1
+                )
+            def user(user_message, history):
+                return "", history + [[user_message, None]]
+            def bot(history):
+                bot_message = generate_text(history[-1][0], llm)
+                history[-1][1] = ""
+                for character in bot_message:
+                    history[-1][1] += character
+                    time.sleep(0.01)
+                    yield history
+            msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+                bot, chatbot, chatbot
+            )
+            send.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
+                bot, chatbot, chatbot
+            )
+            clear.click(clear_chat, None, chatbot, queue=False)
+            gr.HTML("""
+                <p><center><i>Disclaimer: This is a RAG app for demostration purpose. LLM hallucination might occur.</i></center></p>
+                <p><center>Hosted on 🤗 Spaces. Powered by Google PaLM 🌴</center></p>
+            """)
+        app.queue()
+        app.launch()
     except Exception as e:
+        logger.exception(e)

src/llamaindex_palm.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import os
-import logging
-from typing import Any, List
 from pydantic import Extra
 import pinecone
 import google.generativeai as genai
@@ -25,6 +30,25 @@ from llama_index.llms import (
 )
 from llama_index.llms.base import llm_completion_callback
 class LlamaIndexPaLMEmbeddings(BaseEmbedding, extra=Extra.allow):
     def __init__(
         self,
@@ -114,11 +138,13 @@ class LlamaIndexPaLM():
     def __init__(
         self,
         emb_model: LlamaIndexPaLMEmbeddings = LlamaIndexPaLMEmbeddings(),
-        model: LlamaIndexPaLMText = LlamaIndexPaLMText()
     ) -> None:
         self.emb_model = emb_model
         self.llm = model
         # Google Generative AI
         genai.configure(api_key=os.environ['PALM_API_KEY'])
@@ -128,6 +154,9 @@ class LlamaIndexPaLM():
             environment=os.getenv('PINECONE_ENV')
         )
         # model metadata
         CONTEXT_WINDOW = os.getenv('CONTEXT_WINDOW', 8196)
         NUM_OUTPUT = os.getenv('NUM_OUTPUT', 1024)
@@ -156,7 +185,13 @@ class LlamaIndexPaLM():
             prompt_helper=self.prompt_helper,
         )
-    def set_index_from_pinecone(
         self,
         index_name: str = os.getenv('PINECONE_INDEX'),
         index_namespace: str = os.getenv('PINECONE_NAMESPACE')
@@ -168,10 +203,269 @@ class LlamaIndexPaLM():
         self._index_name = index_name
         self._index_namespace = index_namespace
         return None
-    def generate_response(
         self,
         query: str
     ) -> str:
-        response = self.pinecone_index.as_query_engine(similarity_top_k=3,).query(query)
-        return response.response

 import os
+import datetime
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+from typing import Any, List, Dict, Union
 from pydantic import Extra
+import wandb
+from wandb.sdk.data_types.trace_tree import Trace
 import pinecone
 import google.generativeai as genai
 )
 from llama_index.llms.base import llm_completion_callback
+from llama_index.evaluation import SemanticSimilarityEvaluator
+from llama_index.embeddings import SimilarityMode
+import logging
+logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%Y-%m-%d %I:%M:%S %p', level=logging.INFO)
+logger = logging.getLogger('llm')
+prompt_template = """
+[System]
+    You are in a role play of Gerard Lee.
+    Reply in no more than 7 complete sentences using content from [Context] only. Refer to [History] for seamless conversatation.
+[History]
+    {context_history}
+[Context]
+    {context_from_index}
+"""
 class LlamaIndexPaLMEmbeddings(BaseEmbedding, extra=Extra.allow):
     def __init__(
         self,
     def __init__(
         self,
         emb_model: LlamaIndexPaLMEmbeddings = LlamaIndexPaLMEmbeddings(),
+        model: LlamaIndexPaLMText = LlamaIndexPaLMText(),
+        # prompt_template: str = prompt_template
     ) -> None:
         self.emb_model = emb_model
         self.llm = model
+        self.prompt_template = prompt_template
         # Google Generative AI
         genai.configure(api_key=os.environ['PALM_API_KEY'])
             environment=os.getenv('PINECONE_ENV')
         )
+        # W&B
+        wandb.init(project=os.getenv('WANDB_PROJECT'))
         # model metadata
         CONTEXT_WINDOW = os.getenv('CONTEXT_WINDOW', 8196)
         NUM_OUTPUT = os.getenv('NUM_OUTPUT', 1024)
             prompt_helper=self.prompt_helper,
         )
+        self.emd_evaluator = SemanticSimilarityEvaluator(
+            service_context=self.service_context,
+            similarity_mode=SimilarityMode.DEFAULT,
+            similarity_threshold=os.getenv('SIMILARITY_THRESHOLD', 0.7),
+        )
+    def get_index_from_pinecone(
         self,
         index_name: str = os.getenv('PINECONE_INDEX'),
         index_namespace: str = os.getenv('PINECONE_NAMESPACE')
         self._index_name = index_name
         self._index_namespace = index_namespace
         return None
+    def retrieve_context(
+        self,
+        query: str
+    ) -> Dict[str, Union[str, int]]:
+        start_time = round(datetime.datetime.now().timestamp() * 1000)
+        response = self.pinecone_index.as_query_engine(similarity_top_k=3).query(query)
+        end_time = round(datetime.datetime.now().timestamp() * 1000)
+        return {"result": response.response, "start": start_time, "end": end_time}
+    async def aretrieve_context(
         self,
         query: str
+    ) -> Dict[str, Union[str, int]]:
+        start_time = round(datetime.datetime.now().timestamp() * 1000)
+        response = await self.pinecone_index.as_query_engine(similarity_top_k=3, use_async=True).aquery(query)
+        end_time = round(datetime.datetime.now().timestamp() * 1000)
+        return {"result": response.response, "start": start_time, "end": end_time}
+    async def aretrieve_context_multi(
+        self,
+        query_list: List[str]
+    ) -> List[Dict]:
+        result = await asyncio.gather(*(self.aretrieve_context(query) for query in query_list))
+        return result
+    async def aevaluate_context(
+        self,
+        query: str,
+        returned_context: str
+    ) -> Dict[str, Any]:
+        result = await self.emd_evaluator.aevaluate(
+            response=returned_context,
+            reference=query,
+        )
+        return result
+    async def aevaluate_context_multi(
+        self,
+        query_list: List[str],
+        returned_context_list: List[str]
+    ) -> List[Dict]:
+        result = await asyncio.gather(*(self.aevaluate_context(query, returned_context) for query, returned_context in zip(query_list, returned_context_list)))
+        return result
+    def format_history_as_context(
+        self,
+        history: List[str],
+    ) -> str:
+        format_chat_history = "\n".join(list(filter(None, history)))
+        return format_chat_history
+    def generate_text(
+        self,
+        query: str,
+        history: List[str],
     ) -> str:
+        # get history
+        context_history = self.format_history_as_context(history=history)
+        # w&b trace start
+        start_time_ms = round(datetime.datetime.now().timestamp() * 1000)
+        root_span = Trace(
+            name="MetaAgent",
+            kind="agent",
+            start_time_ms=start_time_ms,
+            metadata={"user": "🤗 Space"},
+        )
+        # get retrieval context(s) from llama-index vectorstore index
+        # w&b trace retrieval & select agent
+        agent_span = Trace(
+            name="LlamaIndexAgent",
+            kind="agent",
+            start_time_ms=start_time_ms,
+        )
+        try:
+            # No history, single context retrieval without evaluation
+            if not history:
+                # w&b trace retrieval context
+                result_query_only = self.retrieve_context(query)
+                # async version
+                # result_query_only = asyncio.run(self.retrieve_context(query))
+                context_from_index_selected = result_query_only["result"]
+                agent_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)
+                retrieval_span = Trace(
+                    name="QueryRetrieval",
+                    kind="chain",
+                    status_code="success",
+                    metadata={
+                        "framework": "Llama-Index",
+                        "index_type": "VectorStoreIndex",
+                        "vector_store": "Pinecone",
+                        "vector_store_index": self._index_name,
+                        "vector_store_namespace": self._index_namespace,
+                        "model_name": self.llm._model_name,
+                        "custom_kwargs": self.llm._model_kwargs,
+                    },
+                    start_time_ms=start_time_ms,
+                    end_time_ms=agent_end_time_ms,
+                    inputs={"query": query},
+                    outputs={"response": context_from_index_selected},
+                )
+                agent_span.add_child(retrieval_span)
+            # Has history, multiple context retrieval with async, then evaluation to determine which context to choose
+            else:
+                extended_query = f"[History]\n{history[-1]}\n[New Query]\n{query}"
+                # thread version
+                with ThreadPoolExecutor(2) as executor:
+                    results = executor.map(self.retrieve_context, [query, extended_query])
+                result_query_only, result_extended_query = [rec for rec in results]
+                # async version - not working
+                # result_query_only, result_extended_query = asyncio.run(
+                #     self.aretrieve_context_multi([query, extended_query])
+                # )
+                # w&b trace retrieval context query only
+                retrieval_query_span = Trace(
+                    name="QueryRetrieval",
+                    kind="chain",
+                    status_code="success",
+                    metadata={
+                        "framework": "Llama-Index",
+                        "index_type": "VectorStoreIndex",
+                        "vector_store": "Pinecone",
+                        "vector_store_index": self._index_name,
+                        "vector_store_namespace": self._index_namespace,
+                        "model_name": self.llm._model_name,
+                        "custom_kwargs": self.llm._model_kwargs,
+                        "start_time": result_query_only["start"],
+                        "end_time": result_query_only["end"],
+                    },
+                    start_time_ms=result_query_only["start"],
+                    end_time_ms=result_query_only["end"],
+                    inputs={"query": query},
+                    outputs={"response": result_query_only["result"]},
+                )
+                agent_span.add_child(retrieval_query_span)
+                # w&b trace retrieval context extended query
+                retrieval_extended_query_span = Trace(
+                    name="ExtendedQueryRetrieval",
+                    kind="chain",
+                    status_code="success",
+                    metadata={
+                        "framework": "Llama-Index",
+                        "index_type": "VectorStoreIndex",
+                        "vector_store": "Pinecone",
+                        "vector_store_index": self._index_name,
+                        "vector_store_namespace": self._index_namespace,
+                        "model_name": self.llm._model_name,
+                        "custom_kwargs": self.llm._model_kwargs,
+                        "start_time": result_extended_query["start"],
+                        "end_time": result_extended_query["end"],
+                    },
+                    start_time_ms=result_extended_query["start"],
+                    end_time_ms=result_extended_query["end"],
+                    inputs={"query": extended_query},
+                    outputs={"response": result_extended_query["result"]},
+                )
+                agent_span.add_child(retrieval_extended_query_span)
+                # w&b trace select context
+                eval_start_time_ms = round(datetime.datetime.now().timestamp() * 1000)
+                eval_context_query_only, eval_context_extended_query = asyncio.run(
+                    self.aevaluate_context_multi([query, extended_query], [result_query_only["result"], result_extended_query["result"]])
+                )
+                if eval_context_query_only.score > eval_context_extended_query.score:
+                    query_selected, context_from_index_selected = query, result_query_only["result"]
+                else:
+                    query_selected, context_from_index_selected = extended_query, result_extended_query["result"]
+                agent_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)
+                eval_span = Trace(
+                    name="EmbeddingsEvaluator",
+                    kind="tool",
+                    status_code="success",
+                    metadata={
+                        "framework": "Llama-Index",
+                        "evaluator": "SemanticSimilarityEvaluator",
+                        "similarity_mode": "DEFAULT",
+                        "similarity_threshold": 0.7,
+                        "similarity_results": {
+                            "eval_context_query_only": eval_context_query_only,
+                            "eval_context_extended_query": eval_context_extended_query,
+                        },
+                        "model_name": self.emb_model._model_name,
+                    },
+                    start_time_ms=eval_start_time_ms,
+                    end_time_ms=agent_end_time_ms,
+                    inputs={"query": query_selected},
+                    outputs={"response": context_from_index_selected},
+                )
+                agent_span.add_child(eval_span)
+        except Exception as e:
+            logger.error(f"Exception {e} occured when retriving context\n")
+            llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)
+            result = "Something went wrong. Please try again later."
+            root_span.add_inputs_and_outputs(
+                inputs={"query": query}, outputs={"result": result, "exception": e}
+            )
+            root_span._span.status_code="fail"
+            root_span._span.end_time_ms = llm_end_time_ms
+            root_span.log(name="llm_app_trace")
+            return result
+        logger.info(f"Context from Llama-Index:\n{context_from_index_selected}\n")
+        agent_span.add_inputs_and_outputs(
+            inputs={"query": query}, outputs={"result": context_from_index_selected}
+        )
+        agent_span._span.status_code="success"
+        agent_span._span.end_time_ms = agent_end_time_ms
+        root_span.add_child(agent_span)
+        # generate text with prompt template to roleplay myself
+        prompt_with_context = self.prompt_template.format(context_history=context_history, context_from_index=context_from_index_selected, user_query=query)
+        try:
+            response = genai.generate_text(
+                prompt=prompt_with_context,
+                safety_settings=[
+                    {
+                        'category': genai.types.HarmCategory.HARM_CATEGORY_UNSPECIFIED,
+                        'threshold': genai.types.HarmBlockThreshold.BLOCK_NONE,
+                    },
+                ],
+                temperature=0.9,
+            )
+            result = response.result
+            success_flag = "success"
+            if result is None:
+                result = "Seems something went wrong. Please try again later."
+                logger.error(f"Result with 'None' received\n")
+                success_flag = "fail"
+        except Exception as e:
+            result = "Seems something went wrong. Please try again later."
+            logger.error(f"Exception {e} occured\n")
+            success_flag = "fail"
+        # w&b trace llm
+        llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)
+        llm_span = Trace(
+            name="LLM",
+            kind="llm",
+            status_code=success_flag,
+            start_time_ms=agent_end_time_ms,
+            end_time_ms=llm_end_time_ms,
+            inputs={"input": prompt_with_context},
+            outputs={"result": result},
+        )
+        root_span.add_child(llm_span)
+        # w&b finalize trace
+        root_span.add_inputs_and_outputs(
+            inputs={"query": query}, outputs={"result": result}
+        )
+        root_span._span.end_time_ms = llm_end_time_ms
+        root_span.log(name="llm_app_trace")
+        return result