File size: 5,428 Bytes
2936a70
6aeedda
b16a132
 
 
2936a70
 
 
 
6aeedda
b16a132
 
 
 
6aeedda
b16a132
2936a70
9d33cfe
 
b16a132
2936a70
9d33cfe
2936a70
 
9d33cfe
2936a70
 
 
 
 
9d33cfe
2936a70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import random
import gradio as gr
import sys
import traceback
import pandas as pd
import gradio as gr
import json

import yaml

# from tqdm import tqdm
from scripts.UBAR_code.interaction import UBAR_interact
from scripts.user_model_code.interaction import multiwoz_interact
from scripts.UBAR_code.interaction.UBAR_interact import bcolors


# Initialise agents
UBAR_checkpoint_path = "epoch50_trloss0.59_gpt2"
user_model_checkpoint_path = "MultiWOZ-full_checkpoint_step340k"

sys_model = self_play_sys_model = UBAR_interact.UbarSystemModel(
    "UBAR_sys_model", UBAR_checkpoint_path, "scripts/UBAR_code/interaction/config.yaml"
)
user_model = self_play_user_model = multiwoz_interact.NeuralAgent(
    "user", user_model_checkpoint_path, "scripts/user_model_code/interaction/config.yaml"
)


# Get goals
n_goals = 100
goals_path = "data/raw/UBAR/multi-woz/data.json"
print("Loading goals...")
goals = multiwoz_interact.read_multiWOZ_20_goals(goals_path, n_goals)

# Initialise agent with first goal (can be incrememnted by user) for user simulator tab
curr_goal_idx = random.randint(0, n_goals - 1)
current_goal = goals[curr_goal_idx]
user_model.init_session(ini_goal=current_goal)

# Do the same initialisation but for the self-play tab
curr_sp_goal_idx = random.randint(0, n_goals - 1)
current_sp_goal = goals[curr_sp_goal_idx]
self_play_user_model.init_session(ini_goal=current_sp_goal)

# Get the responses for each agent and track conversation history
ds_history = []
us_history = []
self_play_history = []


def change_goal():
    global curr_goal_idx
    curr_goal_idx = random.randint(0, n_goals - 1)
    current_goal = goals[curr_goal_idx]
    user_model.init_session(ini_goal=current_goal)
    current_goal_yaml = yaml.dump(current_goal, default_flow_style=False)
    return current_goal_yaml


def change_sp_goal():
    global curr_sp_goal_idx
    curr_sp_goal_idx = random.randint(0, n_goals - 1)
    current_sp_goal = goals[curr_sp_goal_idx]
    self_play_user_model.init_session(ini_goal=current_sp_goal)
    current_sp_goal_yaml = yaml.dump(current_sp_goal, default_flow_style=False)
    return current_sp_goal_yaml


def ds_chatbot(user_utt):
    turn_id = len(ds_history)
    sys_response = sys_model.response(user_utt, turn_id)
    ds_history.append((user_utt, sys_response))
    return ds_history


def us_chatbot(sys_response):
    user_utt = user_model.response(sys_response)
    us_history.append((sys_response, user_utt))
    if user_model.is_terminated():
        change_goal()
    return us_history


def self_play():
    if len(self_play_history) == 0:
        sys_response = ""
    else:
        sys_response = self_play_history[-1][1]

    user_utt = user_model.response(sys_response)

    turn_id = len(self_play_history)
    sys_response = sys_model.response(user_utt, turn_id)

    self_play_history.append((user_utt, sys_response))

    if user_model.is_terminated():
        change_goal()

    return self_play_history


# Initialise demo render
block = gr.Blocks()

with block:
    gr.Markdown("# Demo User Simulator and Task-Oriented Dialogue System")
    gr.Markdown("*Created by Alistair McLeay, with help from Professor Bill Byrne, Andy Tseng, and Alex Coca*")
    with gr.Tabs():
        with gr.TabItem("Dialogue System"):
            gr.Markdown(
                "This bot is a Task-Oriented Dialogue Systen. You are the user. Go ahead and try to book a train, or a hotel etc."
            )
            with gr.Row():
                ds_input_text = gr.inputs.Textbox(
                    label="User Message", placeholder="I'd like to book a train from Cambridge to London"
                )
                ds_response = gr.outputs.Chatbot(label="Dialogue System Response")
            ds_button = gr.Button("Submit Message")

        with gr.TabItem("User Simulator"):
            gr.Markdown(
                "This bot is a User Simulator. You are the Task-Oriented Dialogue System. Your job is to help the user with their requests."
            )
            new_goal_button = gr.Button("Generate Goal")
            with gr.Row():
                us_input_text = gr.inputs.Textbox(
                    label="Dialogue System Message", placeholder="How can I help you today?"
                )
                us_response = gr.outputs.Chatbot(label="User Simulator Response")
                current_goal_yaml = gr.outputs.Textbox(label="Current Goal (YAML)")
            us_button = gr.Button("Submit Message")

        with gr.TabItem("Self-Play"):
            gr.Markdown(
                "In this scenario you define a goal and you then watch both agents interact where the User Simulator is trying to achieve the goal, and the Task-Oriented Dialogue System is trying to help the User Simulator do so."
            )
            new_sp_goal_button = gr.Button("Generate Goal")
            with gr.Row():
                self_play_response = gr.outputs.Chatbot(label="Self-Play Output")
                current_sp_goal_yaml = gr.outputs.Textbox(label="Current Goal (YAML)")
            self_play_button = gr.Button("Run Next Step")

    ds_button.click(ds_chatbot, ds_input_text, ds_response)
    us_button.click(us_chatbot, us_input_text, us_response)
    self_play_button.click(self_play, None, self_play_response)
    new_goal_button.click(change_goal, None, current_goal_yaml)
    new_sp_goal_button.click(change_sp_goal, None, current_sp_goal_yaml)

block.launch(share=True)