Spaces:
Running
Running
File size: 9,359 Bytes
11bd154 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
from gym_minigrid.minigrid import COLOR_TO_IDX, OBJECT_TO_IDX
def generate_text_obs(obs, info):
text_observation = obs_to_text(info)
llm_prompt = "Obs : "
llm_prompt += "".join(text_observation)
# add utterances
if obs["utterance_history"] != "Conversation: \n":
utt_hist = obs['utterance_history']
utt_hist = utt_hist.replace("Conversation: \n","")
llm_prompt += utt_hist
return llm_prompt
def obs_to_text(info):
image, vis_mask = info["image"], info["vis_mask"]
carrying = info["carrying"]
agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"]
npc_actions_dict = info["npc_actions_dict"]
# (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
# State, 0: open, 1: closed, 2: locked
IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
list_textual_descriptions = []
if carrying is not None:
list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type))
# agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])
view_field_dictionary = dict()
for i in range(image.shape[0]):
for j in range(image.shape[1]):
if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
if i not in view_field_dictionary.keys():
view_field_dictionary[i] = dict()
view_field_dictionary[i][j] = image[i][j]
else:
view_field_dictionary[i][j] = image[i][j]
# Find the wall if any
# We describe a wall only if there is no objects between the agent and the wall in straight line
# Find wall in front
add_wall_descr = False
if add_wall_descr:
j = agent_pos_vy - 1
object_seen = False
while j >= 0 and not object_seen:
if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
if image[agent_pos_vx][j][0] == 2:
list_textual_descriptions.append(
f"A wall is {agent_pos_vy - j} steps in front of you. \n") # forward
object_seen = True
else:
object_seen = True
j -= 1
# Find wall left
i = agent_pos_vx - 1
object_seen = False
while i >= 0 and not object_seen:
if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
if image[i][agent_pos_vy][0] == 2:
list_textual_descriptions.append(
f"A wall is {agent_pos_vx - i} steps to the left. \n") # left
object_seen = True
else:
object_seen = True
i -= 1
# Find wall right
i = agent_pos_vx + 1
object_seen = False
while i < image.shape[0] and not object_seen:
if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
if image[i][agent_pos_vy][0] == 2:
list_textual_descriptions.append(
f"A wall is {i - agent_pos_vx} steps to the right. \n") # right
object_seen = True
else:
object_seen = True
i += 1
# list_textual_descriptions.append("You see the following objects: ")
# returns the position of seen objects relative to you
for i in view_field_dictionary.keys():
for j in view_field_dictionary[i].keys():
if i != agent_pos_vx or j != agent_pos_vy:
object = view_field_dictionary[i][j]
# # don't show npc
# if IDX_TO_OBJECT[object[0]] == "npc":
# continue
front_dist = agent_pos_vy - j
left_right_dist = i - agent_pos_vx
loc_descr = ""
if front_dist == 1 and left_right_dist == 0:
loc_descr += "Right in front of you "
elif left_right_dist == 1 and front_dist == 0:
loc_descr += "Just to the right of you"
elif left_right_dist == -1 and front_dist == 0:
loc_descr += "Just to the left of you"
else:
front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else ""
loc_descr += front_str
suff = "s" if abs(left_right_dist) > 0 else ""
and_ = "and" if loc_descr != "" else ""
if left_right_dist < 0:
left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
loc_descr += left_right_str
elif left_right_dist > 0:
left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
loc_descr += left_right_str
else:
left_right_str = ""
loc_descr += left_right_str
loc_descr += f" there is a "
obj_type = IDX_TO_OBJECT[object[0]]
if obj_type == "npc":
IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}
description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "
# gaze
gaze_dir = {
0: "towards you",
1: "to the left of you",
2: "in the same direction as you",
3: "to the right of you",
}
description += f"It is looking {gaze_dir[object[3]]}. "
# point
point_dir = {
0: "towards you",
1: "to the left of you",
2: "in the same direction as you",
3: "to the right of you",
}
if object[4] != 255:
description += f"It is pointing {point_dir[object[4]]}. "
# last action
last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]]
last_action = {
"go_forward": "foward",
"rotate_left": "turn left",
"rotate_right": "turn right",
"toggle_action": "toggle",
"point_stop_point": "stop pointing",
"point_E": "",
"point_S": "",
"point_W": "",
"point_N": "",
"stop_point": "stop pointing",
"no_op": ""
}[last_action]
if last_action not in ["no_op", ""]:
description += f"It's last action is {last_action}. "
elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:
# todo: this assumes that Switch.no_light == True
description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
assert object[2:].mean() == 0
elif obj_type == "lockablebox":
IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
assert object[3:].mean() == 0
elif obj_type == "applegenerator":
IDX_TO_STATE = {1: 'square', 2: 'round'}
description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
assert object[3:].mean() == 0
elif obj_type == "remotedoor":
IDX_TO_STATE = {0: 'open', 1: 'closed'}
description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
assert object[3:].mean() == 0
elif obj_type == "door":
IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
assert object[3:].mean() == 0
elif obj_type == "lever":
IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
if object[3] == 255:
countdown_txt = ""
else:
countdown_txt = f"with {object[3]} timesteps left. "
description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"
assert object[4:].mean() == 0
else:
raise ValueError(f"Undefined object type {obj_type}")
full_destr = loc_descr + description + "\n"
list_textual_descriptions.append(full_destr)
if len(list_textual_descriptions) == 0:
list_textual_descriptions.append("\n")
return list_textual_descriptions
|