File size: 10,436 Bytes
9be4956
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))
os.chdir(os.path.dirname(os.path.abspath(__file__)))
import random
from utils.budget_estimation import budget_calc
import json
from datetime import datetime, timedelta
from tools.googleDistanceMatrix.apis import GoogleDistanceMatrix
import numpy as np

google_distance = GoogleDistanceMatrix()

city_set = open('../database/background/citySet_with_states.txt').read().strip().split('\n')

state_city_map = {}

for city in city_set:   
    state = city.split('\t')[1]
    if state not in state_city_map:
        state_city_map[state] = [city.split('\t')[0]]
    else:
        state_city_map[state].append(city.split('\t')[0])

visiting_city_map = {3:1,5:2,7:3}

def round_to_hundreds(num):
    return round(num / 100) * 100

def select_consecutive_dates(num_days, start_date=datetime(2022, 3, 1), end_date=datetime(2022, 4, 1)):
    """
    Selects consecutive dates within the given range.
    """
    # Generate a list of all possible dates within the range
    delta = end_date - start_date
    all_dates = [start_date + timedelta(days=i) for i in range(delta.days)]
    
    # Get the latest possible starting date for the consecutive days
    latest_start = len(all_dates) - num_days
    
    # Randomly select a starting point
    start_index = random.randint(0, latest_start)
    
    # Extract the consecutive dates
    consecutive_dates = all_dates[start_index:start_index+num_days]
    
    return consecutive_dates


def get_org_dest(days:int):
    if days == 3:
        city_set = open('../database/background/citySet_with_states.txt').read().strip().split('\n')

        org = random.choice(city_set)

        while True:
            dest = random.choice(city_set)
            if dest.split('\t')[1] != org.split('\t')[1]:
                break

        final_org = org.split('\t')[0]
        final_des = dest.split('\t')[0]

    elif days in [5,7]:
    
        state_set = open('../database/background/citySet_with_states.txt').read().strip().split('\n')
        org = random.choice(state_set)

        while True:
            dest = random.choice(state_set)
            if dest != org and "None" not in dest and dest.split('\t')[1] != org.split('\t')[1] and len(state_city_map[dest.split('\t')[1]]) > 3:
                break
        final_org = org.split('\t')[0]
        final_des = dest.split('\t')[1]

    return final_org, final_des


def easy_level_element_selection(day_list):
    """Selects the element to be used in the easy level query."""
    days = random.choice(day_list)
    query_dict = None
    date  = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)]
    final_org, final_des = get_org_dest(days)
    budget = budget_calc(final_org, final_des, date=date, days=days )
    local_constraint_list = ["house rule", "cuisine","room type",'transportation']
    local_constrain_record = {key:None for key in local_constraint_list}
    if days == 3:
        final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2)
    elif days == 5:
        final_budget = round_to_hundreds(budget["average"])
    elif days == 7:
        final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2))

    query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days] ,"date":date, "people_number": 1, "local_constraint": local_constrain_record ,"budget": final_budget,"query": None, "level":"easy"}
    return query_dict



def middle_level_element_selection(day_list):
    days = random.choice(day_list)
    date  = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)]
    people_number = random.choice(random.choice([[2],[3,4,5,6,7,8]]))
    local_constraint_list = ["house rule", "cuisine","room type"]
    local_constrain_record = {key:None for key in local_constraint_list}
    local_constrain_record['transportation'] = None
    final_org, final_des = get_org_dest(days)

    local_constraint_type = random.choice(local_constraint_list)

    if local_constraint_type == "flight time":
        local_constraint = random.choice(["morning", "afternoon", "evening"])
        local_constrain_record["flight time"] = local_constraint

    # elif local_constraint_type == "rating":
    #     local_constraint = random.choice([3, 3,5,4,4.5])
    #     local_constrain_record["rating"] = local_constraint

    elif local_constraint_type == "room type":
        if people_number <= 2:
            local_constraint = random.choice(["shared room", "not shared room", "private room", "entire room"])
        else:
            local_constraint = random.choice(["private room", "entire room"])
        local_constrain_record["room type"] = local_constraint

    elif local_constraint_type == "house rule":
        local_constraint = random.choice(["parties","smoking","children under 10","visitors","pets"])
        local_constrain_record["house rule"] = local_constraint

    elif local_constraint_type == "cuisine":
        # choice_number = random.choice([2,3,4,5])
        local_constraint = random.sample(["Chinese", "American", "Italian", "Mexican", "Indian","Mediterranean","French"], 2)
        local_constrain_record["cuisine"] = local_constraint
    
    budget = budget_calc(final_org, final_des, days=days, date=date, people_number=people_number)

    if days == 3:
        final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2 * people_number * 0.75)
    elif days == 5:
        final_budget = round_to_hundreds(budget["average"] * people_number * 0.75)
    elif days == 7:
        final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2) * people_number * 0.75)

    query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days], "date":date, "people_number": people_number, "local_constraint": local_constrain_record ,"budget": final_budget,"query": None, "level":"middle"}
    return query_dict



def hard_level_element_selection(day_list):
    days = random.choice(day_list)
    date  = [date.strftime('%Y-%m-%d') for date in select_consecutive_dates(days)]
    people_number = random.choice(random.choice([[2],[3,4,5,6,7,8]]))
    # local_constraint_list = ["flight time", "house rule", "cuisine","room type", "transportation"]
    local_constraint_list = ["house rule", "cuisine","room type","transportation"]
    probabilities = [0.3, 0.1, 0.3, 0.3] 
    final_org, final_des = get_org_dest(days)
    # result = google_distance.run(final_org, final_des)

    # if result != {} and 'day' not in result["duration"]:
    #     local_constraint_list.append()

    local_constrain_record = {key:None for key in local_constraint_list}

    local_constraint_type_list = np.random.choice(local_constraint_list, size=3, replace=False, p=probabilities).tolist()

    for local_constraint_type in local_constraint_type_list:
        if local_constraint_type == "flight time":
            local_constraint = random.choice(["morning", "afternoon", "evening"])
            local_constrain_record["flight time"] = local_constraint

        elif local_constraint_type == "transportation":
            local_constraint = random.choice(["no flight", "no self-driving"])
            local_constrain_record["transportation"] = local_constraint

        elif local_constraint_type == "room type":
            if people_number <= 2:
                local_constraint = random.choice(["shared room", "not shared room", "private room", "entire room"])
            else:
                local_constraint = random.choice(["private room", "entire room"])
            local_constrain_record["room type"] = local_constraint

        elif local_constraint_type == "house rule":
            local_constraint = random.choice(["parties","smoking","children under 10","visitors","pets"])
            local_constrain_record["house rule"] = local_constraint

        elif local_constraint_type == "cuisine":
            # choice_number = random.choice([2,3,4,5])
            local_constraint = random.sample(["Chinese", "American", "Italian", "Mexican", "Indian","Mediterranean","French"], 4)
            local_constrain_record["cuisine"] = local_constraint
    
    budget = budget_calc(final_org, final_des, days=days, date=date, people_number=people_number,local_constraint=local_constrain_record)

    if days == 3:
        final_budget = round_to_hundreds((budget["average"]+budget["lowest"])/2 * people_number * 0.5)
    elif days == 5:
        final_budget = round_to_hundreds(budget["average"] * people_number * 0.5)
    elif days == 7:
        final_budget = round_to_hundreds(round_to_hundreds((budget["average"]+budget["highest"])/2) * people_number * 0.5)

    query_dict = {"org": final_org, "dest": final_des, "days": days, "visiting_city_number":visiting_city_map[days], "date":date, "people_number": people_number, "local_constraint": local_constrain_record ,"budget": final_budget, "query": None,"level":"hard"}

    return query_dict


def generate_elements(number:int, level="easy", day_list=[3,5,7]):
    """Generate the elements for the easy level query."""
    query_list = []
    while len(query_list) < number:
        print(len(query_list))
        try:
            if level == "easy":
                query = easy_level_element_selection(day_list)
                if query not in query_list:
                    query_list.append(query)
            elif level == "middle":
                query = middle_level_element_selection(day_list)
                if query not in query_list:
                    query_list.append(query)
            elif level == "hard":
                query = hard_level_element_selection(day_list)
                if query not in query_list:
                    query_list.append(query)
        except ValueError:
            continue
    return query_list

def main():
    """Generate the elements for the different level query."""

    # save query_list as jsonl file
    for num, day_list in zip([160,160,160], [[3],[5],[7]]):
        query_list = generate_elements(num,"middle",day_list=day_list)

        with open('../data/query/final_annotation_middle.jsonl', 'a+') as f:
            for query in query_list:
                # print(query)
                json.dump(query, f)
                f.write('\n')
            f.close()


if __name__ == "__main__":
    main()