Spaces:
Running
Running
from src.utils import get_valid_name_city,extract_before_parenthesis,extract_numbers_from_filenames | |
from tools.flights.apis import Flights | |
from tools.accommodations.apis import Accommodations | |
from tools.restaurants.apis import Restaurants | |
from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix | |
from tools.attractions.apis import Attractions | |
import math | |
import json | |
import re | |
import numpy as np | |
import os | |
import sys | |
from tqdm import tqdm | |
import argparse | |
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), ".."))) | |
os.chdir(os.path.dirname(os.path.abspath(__file__))) | |
flight = Flights() | |
accommodation = Accommodations() | |
restaurants = Restaurants() | |
googleDistanceMatrix = GoogleDistanceMatrix() | |
attractions = Attractions() | |
def load_line_json_data(filename): | |
data = [] | |
with open(filename, 'r', encoding='utf-8') as f: | |
for line in f.read().strip().split('\n'): | |
unit = json.loads(line) | |
data.append(unit) | |
return data | |
def convert_bool_values(item): | |
if isinstance(item, dict): | |
# If the item is a dictionary, recurse on each value | |
return {key: convert_bool_values(value) for key, value in item.items()} | |
elif isinstance(item, list): | |
# If the item is a list, recurse on each item in the list | |
return [convert_bool_values(value) for value in item] | |
elif isinstance(item, tuple): | |
# If the item is a tuple, recurse on each item in the tuple and repackage as a tuple | |
return tuple(convert_bool_values(value) for value in item) | |
elif isinstance(item, np.bool_): # Here we check for numpy's bool_ type | |
# If the item is a numpy bool_, convert it to a standard Python bool | |
return bool(item) | |
else: | |
# If the item is any other type, return it unchanged | |
return item | |
def extract_from_to(text: str): | |
""" | |
Extracts 'A' and 'B' from the format "from A to B" in the given text, with B ending at a comma or the end of the string. | |
Args: | |
- text (str): The input string. | |
Returns: | |
- tuple: A tuple containing 'A' and 'B'. If no match is found, returns (None, None). | |
""" | |
pattern = r"from\s+(.+?)\s+to\s+([^,]+)(?=[,\s]|$)" | |
matches = re.search(pattern, text) | |
return matches.groups() if matches else (None, None) | |
def get_total_cost(question, tested_data): | |
total_cost = 0 | |
for i in range(min(question['days'],len(tested_data))): | |
unit = tested_data[i] | |
# transporation | |
if unit['transportation'] and unit['transportation'] != '-': | |
value = unit['transportation'] | |
org_city, dest_city = extract_from_to(value) | |
if org_city == None or dest_city == None: | |
org_city, dest_city = extract_from_to(unit['current_city']) | |
if org_city == None or dest_city == None: | |
pass | |
else: | |
if 'flight number' in value.lower(): | |
res = flight.data[flight.data['Flight Number'] == value.split('Flight Number: ')[1].split(',')[0]] | |
if len(res) > 0: | |
total_cost += res['Price'].values[0] * question['people_number'] | |
elif 'self-driving' in value.lower() or 'taxi' in value.lower(): | |
if 'self-driving' in value.lower(): | |
# print(org_city,dest_city) | |
cost = googleDistanceMatrix.run_for_evaluation(org_city,dest_city,'self-driving')['cost'] | |
total_cost += cost * math.ceil(question['people_number'] * 1.0 / 5) | |
else: | |
cost = googleDistanceMatrix.run_for_evaluation(org_city,dest_city,'taxi')['cost'] | |
total_cost += cost * math.ceil(question['people_number'] * 1.0 / 4) | |
# breakfast | |
if unit['breakfast'] and unit['breakfast'] != '-': | |
name, city = get_valid_name_city(unit['breakfast']) | |
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] | |
if len(res) > 0: | |
total_cost += res['Average Cost'].values[0] * question['people_number'] | |
# lunch | |
if unit['lunch'] and unit['lunch'] != '-': | |
name, city = get_valid_name_city(unit['lunch']) | |
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] | |
if len(res) > 0: | |
total_cost += res['Average Cost'].values[0] * question['people_number'] | |
# dinner | |
if unit['dinner'] and unit['dinner'] != '-': | |
name, city = get_valid_name_city(unit['dinner']) | |
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] | |
if len(res) > 0: | |
total_cost += res['Average Cost'].values[0] * question['people_number'] | |
# accommodation | |
if unit['accommodation'] and unit['accommodation'] != '-': | |
name, city = get_valid_name_city(unit['accommodation']) | |
res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)] | |
if len(res) > 0: | |
total_cost += res['price'].values[0] * math.ceil(question['people_number'] * 1.0 / res['maximum occupancy'].values[0]) | |
# print(total_cost) | |
return total_cost | |
def is_valid_room_rule(question, tested_data): | |
if question['local_constraint']['house rule'] is None: | |
return None,None | |
for i in range(min(question['days'],len(tested_data))): | |
unit = tested_data[i] | |
if unit['accommodation'] and unit['accommodation'] != '-': | |
name, city = get_valid_name_city(unit['accommodation']) | |
res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)] | |
if len(res) > 0: | |
if question['local_constraint']['house rule'] == 'smoking' and 'No smoking' in str(res['house_rules'].values[0]): | |
return False, f"The house rule should be {question['local_constraint']['house rule']}." | |
if question['local_constraint']['house rule'] == 'parities' and 'No parties' in str(res['house_rules'].values[0]): | |
return False, f"The house rule should be {question['local_constraint']['house rule']}." | |
if question['local_constraint']['house rule'] == 'children under 10' and 'No children under 10' in str(res['house_rules'].values[0]): | |
return False, f"The house rule should be {question['local_constraint']['house rule']}." | |
if question['local_constraint']['house rule'] == 'visitors' and 'No visitors' in str(res['house_rules'].values[0]): | |
return False, f"The house rule should be {question['local_constraint']['house rule']}." | |
if question['local_constraint']['house rule'] == 'pets' and 'No pets' in str(res['house_rules'].values[0]): | |
return False, f"The house rule should be {question['local_constraint']['house rule']}." | |
return True, None | |
def is_valid_cuisine(question, tested_data): | |
cuisine_set = set() | |
if question['local_constraint']['cuisine']: | |
for i in range(min(question['days'],len(tested_data))): | |
unit = tested_data[i] | |
if unit['breakfast'] and unit['breakfast'] != '-': | |
name, city = get_valid_name_city(unit['breakfast']) | |
if city == question['org']: | |
continue | |
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] | |
if len(res) > 0: | |
for cuisine in question['local_constraint']['cuisine']: | |
if cuisine in res.iloc[0]['Cuisines']: | |
cuisine_set.add(cuisine) | |
if unit['lunch'] and unit['lunch'] != '-': | |
name, city = get_valid_name_city(unit['lunch']) | |
if city == question['org']: | |
continue | |
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] | |
if len(res) > 0: | |
for cuisine in question['local_constraint']['cuisine']: | |
if cuisine in res.iloc[0]['Cuisines']: | |
cuisine_set.add(cuisine) | |
if unit['dinner'] and unit['dinner'] != '-': | |
name, city = get_valid_name_city(unit['dinner']) | |
if city == question['org']: | |
continue | |
res = restaurants.data[(restaurants.data['Name'].astype(str).str.contains(re.escape(name))) & (restaurants.data['City'] == city)] | |
if len(res) > 0: | |
for cuisine in question['local_constraint']['cuisine']: | |
if cuisine in res.iloc[0]['Cuisines']: | |
cuisine_set.add(cuisine) | |
if len(cuisine_set) == len(question['local_constraint']['cuisine']): | |
return True, None | |
else: | |
# judge which cuisine is not satisfied | |
for cuisine in question['local_constraint']['cuisine']: | |
if cuisine not in cuisine_set: | |
return False, f"The cuisine {cuisine} is not satisfied." | |
# return False, f"The cuisine should be {question['local_constraint']['cuisine']}." | |
else: | |
return None,None | |
def is_valid_transportation(question, tested_data): | |
if question['local_constraint']['transportation'] is None: | |
return None,None | |
for i in range(min(question['days'],len(tested_data))): | |
unit = tested_data[i] | |
if unit['transportation'] and unit['transportation'] != '-': | |
value = unit['transportation'] | |
if question['local_constraint']['transportation'] == 'no flight' and 'Flight' in value: | |
return False, f"The transportation should not be {question['local_constraint']['transportation']}." | |
elif question['local_constraint']['transportation'] == 'no self-driving' and 'Self-driving' in value: | |
return False, f"The transportation should not be {question['local_constraint']['transportation']}." | |
return True, None | |
def is_valid_room_type(question, tested_data): | |
if question['local_constraint']['room type'] is None: | |
return None,None | |
for i in range(min(question['days'],len(tested_data))): | |
unit = tested_data[i] | |
if unit['accommodation'] and unit['accommodation'] != '-': | |
name, city = get_valid_name_city(unit['accommodation']) | |
res = accommodation.data[(accommodation.data['NAME'].astype(str).str.contains(re.escape(name))) & (accommodation.data['city'] == city)] | |
if len(res) > 0: | |
if question['local_constraint']['room type'] == 'not shared room' and res['room type'].values[0] == 'Shared room': | |
return False, f"The room type should be {question['local_constraint']['room type']}." | |
# "shared room", "not shared room", "private room", "entire room" | |
elif question['local_constraint']['room type'] == 'shared room' and res['room type'].values[0] != 'Shared room': | |
return False, f"The room type should be {question['local_constraint']['room type']}." | |
elif question['local_constraint']['room type'] == 'private room' and res['room type'].values[0] != 'Private room': | |
return False, f"The room type should be {question['local_constraint']['room type']}." | |
elif question['local_constraint']['room type'] == 'entire room' and res['room type'].values[0] != 'Entire home/apt': | |
return False, f"The room type should be {question['local_constraint']['room type']}." | |
return True, None | |
def evaluation(query_data, tested_data): | |
return_info = {} | |
return_info['valid_cuisine'] = is_valid_cuisine(query_data, tested_data) | |
return_info['valid_room_rule'] = is_valid_room_rule(query_data, tested_data) | |
return_info['valid_transportation'] = is_valid_transportation(query_data, tested_data) | |
return_info['valid_room_type'] = is_valid_room_type(query_data, tested_data) | |
return_info['valid_cost'] = (bool(get_total_cost(query_data, tested_data) <= query_data['budget']), None) | |
return return_info | |
def boolean_evaluation(query_data, tested_data): | |
return_info = {} | |
return_info['valid_cuisine'] = is_valid_cuisine(query_data, tested_data) | |
return_info['valid_room_rule'] = is_valid_room_rule(query_data, tested_data) | |
return_info['valid_transportation'] = is_valid_transportation(query_data, tested_data) | |
return_info['valid_room_type'] = is_valid_room_type(query_data, tested_data) | |
return_info['valid_cost'] = (bool(get_total_cost(query_data, tested_data) <= query_data['budget']), None) | |
for key in return_info: | |
if return_info[key][0] == False: | |
print(key) | |
return False | |
return True | |