Spaces:

flowers-team
/

SocialAISchool

Running

File size: 9,359 Bytes

11bd154

from gym_minigrid.minigrid import COLOR_TO_IDX, OBJECT_TO_IDX

def generate_text_obs(obs, info):

    text_observation = obs_to_text(info)

    llm_prompt = "Obs : "
    llm_prompt += "".join(text_observation)

    # add utterances
    if obs["utterance_history"] != "Conversation: \n":
        utt_hist = obs['utterance_history']
        utt_hist = utt_hist.replace("Conversation: \n","")
        llm_prompt += utt_hist

    return llm_prompt

def obs_to_text(info):
    image, vis_mask = info["image"], info["vis_mask"]
    carrying = info["carrying"]
    agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"]
    npc_actions_dict = info["npc_actions_dict"]

    # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
    # State, 0: open, 1: closed, 2: locked
    IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
    IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))

    list_textual_descriptions = []

    if carrying is not None:
        list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type))

    # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])

    view_field_dictionary = dict()

    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
                if i not in view_field_dictionary.keys():
                    view_field_dictionary[i] = dict()
                    view_field_dictionary[i][j] = image[i][j]
                else:
                    view_field_dictionary[i][j] = image[i][j]

    # Find the wall if any
    #  We describe a wall only if there is no objects between the agent and the wall in straight line

    # Find wall in front
    add_wall_descr = False
    if add_wall_descr:
        j = agent_pos_vy - 1
        object_seen = False
        while j >= 0 and not object_seen:
            if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
                if image[agent_pos_vx][j][0] == 2:
                    list_textual_descriptions.append(
                        f"A wall is {agent_pos_vy - j} steps in front of you. \n")  # forward
                    object_seen = True
                else:
                    object_seen = True
            j -= 1
        # Find wall left
        i = agent_pos_vx - 1
        object_seen = False
        while i >= 0 and not object_seen:
            if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
                if image[i][agent_pos_vy][0] == 2:
                    list_textual_descriptions.append(
                        f"A wall is {agent_pos_vx - i} steps to the left. \n")  # left
                    object_seen = True
                else:
                    object_seen = True
            i -= 1
        # Find wall right
        i = agent_pos_vx + 1
        object_seen = False
        while i < image.shape[0] and not object_seen:
            if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
                if image[i][agent_pos_vy][0] == 2:
                    list_textual_descriptions.append(
                        f"A wall is {i - agent_pos_vx} steps to the right. \n")  # right
                    object_seen = True
                else:
                    object_seen = True
            i += 1

    # list_textual_descriptions.append("You see the following objects: ")
    # returns the position of seen objects relative to you
    for i in view_field_dictionary.keys():
        for j in view_field_dictionary[i].keys():
            if i != agent_pos_vx or j != agent_pos_vy:
                object = view_field_dictionary[i][j]

                # # don't show npc
                # if IDX_TO_OBJECT[object[0]] == "npc":
                #     continue

                front_dist = agent_pos_vy - j
                left_right_dist = i - agent_pos_vx

                loc_descr = ""
                if front_dist == 1 and left_right_dist == 0:
                    loc_descr += "Right in front of you "

                elif left_right_dist == 1 and front_dist == 0:
                    loc_descr += "Just to the right of you"

                elif left_right_dist == -1 and front_dist == 0:
                    loc_descr += "Just to the left of you"

                else:
                    front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else ""

                    loc_descr += front_str

                    suff = "s" if abs(left_right_dist) > 0 else ""
                    and_ = "and" if loc_descr != "" else ""

                    if left_right_dist < 0:
                        left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
                        loc_descr += left_right_str

                    elif left_right_dist > 0:
                        left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
                        loc_descr += left_right_str

                    else:
                        left_right_str = ""
                        loc_descr += left_right_str

                loc_descr += f" there is a "

                obj_type = IDX_TO_OBJECT[object[0]]
                if obj_type == "npc":
                    IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}

                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "

                    # gaze
                    gaze_dir = {
                        0: "towards you",
                        1: "to the left of you",
                        2: "in the same direction as you",
                        3: "to the right of you",
                    }
                    description += f"It is looking {gaze_dir[object[3]]}. "

                    # point
                    point_dir = {
                        0: "towards you",
                        1: "to the left of you",
                        2: "in the same direction as you",
                        3: "to the right of you",
                    }

                    if object[4] != 255:
                        description += f"It is pointing {point_dir[object[4]]}. "

                    # last action
                    last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]]

                    last_action = {
                        "go_forward": "foward",
                        "rotate_left": "turn left",
                        "rotate_right": "turn right",
                        "toggle_action": "toggle",
                        "point_stop_point": "stop pointing",
                        "point_E": "",
                        "point_S": "",
                        "point_W": "",
                        "point_N": "",
                        "stop_point": "stop pointing",
                        "no_op": ""
                    }[last_action]

                    if last_action not in ["no_op", ""]:
                        description += f"It's last action is {last_action}. "

                elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:
                    # todo: this assumes that Switch.no_light == True
                    description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
                    assert object[2:].mean() == 0

                elif obj_type == "lockablebox":
                    IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
                    assert object[3:].mean() == 0

                elif obj_type == "applegenerator":
                    IDX_TO_STATE = {1: 'square', 2: 'round'}
                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
                    assert object[3:].mean() == 0

                elif obj_type == "remotedoor":
                    IDX_TO_STATE = {0: 'open', 1: 'closed'}
                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
                    assert object[3:].mean() == 0

                elif obj_type == "door":
                    IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
                    assert object[3:].mean() == 0

                elif obj_type == "lever":
                    IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
                    if object[3] == 255:
                        countdown_txt = ""
                    else:
                        countdown_txt = f"with {object[3]} timesteps left. "

                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"

                    assert object[4:].mean() == 0
                else:
                    raise ValueError(f"Undefined object type {obj_type}")

                full_destr = loc_descr + description + "\n"

                list_textual_descriptions.append(full_destr)

    if len(list_textual_descriptions) == 0:
        list_textual_descriptions.append("\n")

    return list_textual_descriptions