File size: 11,536 Bytes
caa0e13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
#@title Model Loading
from utility_func import get_history_from_prompt, get_latest_user_input_from_prompt, get_top_intents, create_embedding
from intents import intents, intents_sentence_similarity_en, chatbot_intents
from prompt import prompt_template
import flows
import os
import gradio as gr
import pandas as pd
import langchain
from langchain import PromptTemplate, LLMChain
from langchain.chat_models import ChatOpenAI
from datetime import date
import numpy as np
from openai import OpenAI
import time
with open('new_embed_2.npy', 'rb') as f:
openai_intents_embedding = np.load(f)
llm = None
llm_chain = None
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def get_embedding(text, api_key, model="text-embedding-ada-002"):
client = OpenAI(api_key = api_key)
text = text.replace("\n", " ")
return client.embeddings.create(input = [text], model=model).data[0].embedding
def raw_inference(input, recv_state, n_samples, threshold, api_key):
state = flows.STATE_FLOWS_MAP[recv_state]
t1 = time.time()
query_embedding = get_embedding(input, api_key)
similarity = cosine_similarity(openai_intents_embedding, query_embedding)
result = get_top_intents(intents, similarity, n=n_samples, threshold=threshold, flow=state)
t2 = time.time()
latency = t2-t1
print("latency: %s", latency)
return result, gr.Button("Ask intent with Language Model", visible=True)
def process_csv(files):
global df
df = pd.read_csv(files, low_memory=False)
df = df[df['chatbot_response'].isin(intents)]
df = df[["user_message","prompt", "chatbot_response", "state"]]
df.dropna(inplace=True)
df = df.reset_index()
df.drop('index', axis='columns')
df_length = len(df.index)
chat = get_latest_user_input_from_prompt(df.iloc[1]["prompt"])
history = get_history_from_prompt(df.iloc[1]["prompt"])
state = flows.STATE_FLOWS_MAP[df.iloc[1]['state']]
label = df.iloc[1]['chatbot_response']
# accuracy = gr.Markdown("""
# You can also check accuracy on how well the model predict the intents based on your provided CSV files. This might take 1-2 minutes.
# """, visible=True)
# accuracy_button = gr.Button("Calculate Accuracy", visible=True)
return (gr.UploadButton("Upload CSV...", file_types=["file"], file_count="single", visible=False),
files,
gr.Slider(1, df_length, value=1, step=1, visible=True, label="Index", info="Select which index of data to check the intents"),
gr.Textbox(label="Input Chat", info="Input in index", visible=True, value=chat, interactive=False),
gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=True, value=state, interactive=False),
gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=True, value=label, interactive=False),
gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True, value=history, interactive=False))
def update_index(index):
chat = get_latest_user_input_from_prompt(df.iloc[int(index)]["prompt"])
history = get_history_from_prompt(df.iloc[int(index)]["prompt"])
state = df.iloc[int(index)]['state']
label = df.iloc[int(index)]['chatbot_response']
return (gr.Textbox(label="Input Chat", info="Input in index", visible=True, value=chat, interactive=False),
gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=True, value=state, interactive=False),
gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=True, value=label, interactive=False),
gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True, value=history, interactive=False))
def check_accuracy(n_samples, threshold):
global df
res_list = []
for index, row in df.iterrows():
# chat = get_history_from_prompt(row["prompt"])
chat = get_latest_user_input_from_prompt(row["prompt"])
query_embedding = get_embedding(chat)
flow = flows.STATE_FLOWS_MAP[row['state']]
similarity = cosine_similarity(openai_intents_embedding, query_embedding)
result = get_top_intents(intents, similarity, n=n_samples, threshold=threshold, flow=flow)
label = row['chatbot_response']
isPredictedTrue=0
for item in result:
if label in item:
isPredictedTrue=1
break
res_list.append({'state': row['state'], 'gt': label, 'isPredictedTrue': isPredictedTrue})
res_df = pd.DataFrame(res_list)
# dataframe result
grouped_data = res_df.groupby('gt')['isPredictedTrue'].agg(['sum', 'count']).reset_index()
grouped_data['percentage'] = (grouped_data['sum'] / grouped_data['count']) * 100
# accuracy score
score = (res_df['isPredictedTrue'] == 1).sum()/res_df['isPredictedTrue'].count() * 100 #raw
print(score, grouped_data)
return score, grouped_data
def classify_intent(input_text:str, history:str, answer, model_name, api_key):
print(f"predicting with llm... date: {date.today()}")
print(f"model name: {model_name}")
llm = ChatOpenAI(model=model_name, temperature='0.1', openai_api_key=api_key)
prompt = PromptTemplate(template=prompt_template, input_variables=["intents", "INPUT", "chatHistory"])
llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=False)
inp_intents = ''
for i in range(len(answer)):
inp_intents += answer[i][0]+": "+chatbot_intents[answer[i][0]]+"\n"
predicted_intent = llm_chain.run({"intents":inp_intents, "INPUT": input_text, "chatHistory": history})
prompt_result = llm_chain.prompt.format_prompt(intents = inp_intents, INPUT = input_text, chatHistory = history).to_string()
return predicted_intent, prompt_result
theme = gr.themes.Default(
primary_hue="indigo",
secondary_hue="pink",
neutral_hue="slate",
)
with gr.Blocks(title="Intent Classification Demo", theme=theme) as interface:
gr.Markdown("""# Demo for Intent Classification""")
with gr.Row(equal_height=True):
with gr.Column():
model_name = gr.Dropdown(["gpt-3.5-turbo",
"gpt-3.5-turbo-1106",
"gpt-4",
"gpt-4-1106-preview"],
label="Model name",
info="Select model name for GPT")
api_key = gr.Textbox(label="OpenAI API Key", info="get it at https://platform.openai.com/account/api-keys",visible=True, lines=1, type="password")
n_samples = gr.Slider(1, 10, value=10, step=1, label="N samples", info="Number of samples to be retrieved. Default is 5")
threshold = gr.Slider(0.0, 1.0, value=0.13, step=0.01, label="Threshold", info="Threshold of cosine similarity which intent will be considered similar to the input. The higher, the more similar the intent will be. Default is 0.75")
with gr.Tab("Input from raw text"):
raw_input_text = gr.Textbox(label="Input Chat", info="Input your chat here, the model will predict the intent")
raw_state = gr.Dropdown(["GeneralState",
"HomeworkState",
"ExerciseState",
"UnderstandState",
"RecommendMaterialState",
"PersonalState",
"AssessKnowledgeState"],
label="State",
info="Select state on which the chat currently on. Some state will exclude some intents")
raw_history = gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=True)
raw_ask_button = gr.Button("Ask")
ask_llm_button_raw = gr.Button("Ask intent with Language Model", visible=False)
with gr.Tab("Input from Big Query data"):
gr.Markdown("""
## Guide:
Assuming have access to BigQuery, you can query the table `silicon-airlock-153323.chatbot_ai_dwp.fact_chatbot_ai_conversation_raw`, export result as CSV file, and upload here (make sure your query contains these columns: `prompt, user_message, chatbot_response, state`)
```SELECT prompt, user_message, chatbot_response, state FROM `silicon-airlock-153323.chatbot_ai_dwp.fact_chatbot_ai_conversation_raw` WHERE DATE(_PARTITIONTIME) BETWEEN DATE("2023-11-13") AND DATE("2023-11-19") AND service_name = 'learning_companion' LIMIT 1000```
Adjust the date according to needs. After that, export as CSV and upload to this gradio
example CSV files to use:
https://drive.google.com/file/d/1iDLywKP5JxDJXaAzomSUYLZRWvoGqpt5/view?usp=sharing
https://drive.google.com/file/d/1Jh_hP7U2JGQXsRo9OponyVSHL_s1Yx8w/view?usp=sharing
""")
file_output = gr.File()
upload_button = gr.UploadButton("Upload CSV...", file_types=["file"], file_count="single")
index = gr.Slider(1, 1000, value=5, step=1, visible=False, label="Index", info="Select which index of data to check the intents")
input_text = gr.Textbox(label="Input Chat", info="Input in index", visible=False)
state = gr.Textbox(label="State", info="State on which the chat currently on. Some state will exclude some intents", visible=False)
history = gr.Textbox(label="History or summary", info="Chat history or summary, if available", visible=False)
gt = gr.Textbox(label="Ground Truth", info="The label in which the IntentClassification predict in the CSV", visible=False)
ask_button = gr.Button("Ask With CSV")
ask_llm_button = gr.Button("Ask intent with Language Model", visible=False)
index.change(fn=update_index, inputs=index, outputs=[input_text, state, gt, history])
upload_button.upload(process_csv, upload_button, [upload_button, file_output, index, input_text, state, gt, history])
with gr.Column():
with gr.Row():
accuracy = gr.Markdown("""
You can also check accuracy on how well the model predict the intents based on your provided CSV files. This might take 1-2 minutes.
""", visible=True)
accuracy_button = gr.Button("Calculate Accuracy", visible=True)
accuracy_score = gr.Label(label="Accuracy result", visible=True)
accuracy_table = gr.Dataframe(visible=True)
with gr.Column():
answer = gr.JSON(label="Sentence Similarity Prediction", show_label=True)
LLM_prediction = gr.Label(label="LLM Prediction Result", visible=True)
LLM_prompt = gr.Textbox(label="Prompt Used for Language Model", info="Showing prompt used in language model", visible=True)
accuracy_button.click(fn=check_accuracy, inputs=[n_samples, threshold], outputs=[accuracy_score, accuracy_table])
raw_ask_button.click(fn=raw_inference, inputs=[raw_input_text, raw_state, n_samples, threshold, api_key], outputs=[answer, ask_llm_button_raw])
ask_button.click(fn=raw_inference, inputs=[input_text, state, n_samples, threshold, api_key], outputs=[answer, ask_llm_button])
ask_llm_button.click(fn=classify_intent, inputs=[input_text, history, answer, model_name, api_key], outputs=[LLM_prediction, LLM_prompt])
ask_llm_button_raw.click(fn=classify_intent, inputs=[raw_input_text, raw_history, answer, model_name, api_key], outputs=[LLM_prediction, LLM_prompt])
# interface.launch(debug=True)
# interface.launch(share=True, debug=True)
interface.launch(inline=True) |