Spaces:
Runtime error
Runtime error
alistairmcleay
commited on
Commit
•
19cbc2a
1
Parent(s):
76277a1
Fixing data paths
Browse files
src/crazyneuraluser/UBAR_code/config.py
CHANGED
@@ -10,29 +10,25 @@ class _Config:
|
|
10 |
def _multiwoz_ubar_init(self):
|
11 |
self.gpt_path = "distilgpt2"
|
12 |
|
13 |
-
self.vocab_path_train = "
|
14 |
self.vocab_path_eval = None
|
15 |
-
self.data_path = "
|
16 |
self.data_file = "data_for_ubar.json"
|
17 |
-
self.dev_list = "
|
18 |
-
self.test_list = "
|
19 |
self.dbs = {
|
20 |
-
"attraction": "
|
21 |
-
"hospital": "
|
22 |
-
"hotel": "
|
23 |
-
"police": "
|
24 |
-
"restaurant": "
|
25 |
-
"taxi": "
|
26 |
-
"train": "
|
27 |
}
|
28 |
-
self.glove_path = "
|
29 |
-
self.domain_file_path = "
|
30 |
-
self.slot_value_set_path =
|
31 |
-
|
32 |
-
)
|
33 |
-
self.multi_acts_path = (
|
34 |
-
"cambridge-masters-project/data/preprocessed/UBAR/multi-woz-processed/multi_act_mapping_train.json"
|
35 |
-
)
|
36 |
self.exp_path = "to be generated"
|
37 |
self.log_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
|
38 |
|
|
|
10 |
def _multiwoz_ubar_init(self):
|
11 |
self.gpt_path = "distilgpt2"
|
12 |
|
13 |
+
self.vocab_path_train = "data/preprocessed/UBAR/multi-woz-processed/vocab"
|
14 |
self.vocab_path_eval = None
|
15 |
+
self.data_path = "data/preprocessed/UBAR/multi-woz-processed/"
|
16 |
self.data_file = "data_for_ubar.json"
|
17 |
+
self.dev_list = "data/raw/UBAR/multi-woz/valListFile.json"
|
18 |
+
self.test_list = "data/raw/UBAR/multi-woz/testListFile.json"
|
19 |
self.dbs = {
|
20 |
+
"attraction": "data/preprocessed/UBAR/db_processed/attraction_db_processed.json",
|
21 |
+
"hospital": "data/preprocessed/UBAR/db_processed/hospital_db_processed.json",
|
22 |
+
"hotel": "data/preprocessed/UBAR/db_processed/hotel_db_processed.json",
|
23 |
+
"police": "data/preprocessed/UBAR/db_processed/police_db_processed.json",
|
24 |
+
"restaurant": "data/preprocessed/UBAR/db_processed/restaurant_db_processed.json",
|
25 |
+
"taxi": "data/preprocessed/UBAR/db_processed/taxi_db_processed.json",
|
26 |
+
"train": "data/preprocessed/UBAR/db_processed/train_db_processed.json",
|
27 |
}
|
28 |
+
self.glove_path = "data/glove/glove.6B.50d.txt"
|
29 |
+
self.domain_file_path = "data/preprocessed/UBAR/multi-woz-processed/domain_files.json"
|
30 |
+
self.slot_value_set_path = "data/preprocessed/UBAR/db_processed/value_set_processed.json"
|
31 |
+
self.multi_acts_path = "data/preprocessed/UBAR/multi-woz-processed/multi_act_mapping_train.json"
|
|
|
|
|
|
|
|
|
32 |
self.exp_path = "to be generated"
|
33 |
self.log_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
|
34 |
|
src/crazyneuraluser/UBAR_code/reader.py
CHANGED
@@ -301,7 +301,7 @@ class MultiWozReader(_ReaderBase):
|
|
301 |
self.exp_files[fn.replace(".json", "")] = 1
|
302 |
#
|
303 |
|
304 |
-
self._load_data()
|
305 |
|
306 |
if cfg.limit_bspn_vocab:
|
307 |
self.bspn_masks = self._construct_bspn_constraint()
|
@@ -497,7 +497,7 @@ class MultiWozReader(_ReaderBase):
|
|
497 |
|
498 |
if os.path.exists(encoded_file):
|
499 |
logging.info("Reading encoded data from {}".format(encoded_file))
|
500 |
-
self.data = json.loads(open(cfg.data_path + cfg.data_file, "r", encoding="utf-8").read().lower())
|
501 |
encoded_data = json.loads(open(encoded_file, "r", encoding="utf-8").read())
|
502 |
self.train = encoded_data["train"]
|
503 |
self.dev = encoded_data["dev"]
|
@@ -505,7 +505,7 @@ class MultiWozReader(_ReaderBase):
|
|
505 |
else:
|
506 |
logging.info("Encoding data now and save the encoded data in {}".format(encoded_file))
|
507 |
# not exists, encode data and save
|
508 |
-
self.data = json.loads(open(cfg.data_path + cfg.data_file, "r", encoding="utf-8").read().lower())
|
509 |
self.train, self.dev, self.test = [], [], []
|
510 |
for fn, dial in self.data.items():
|
511 |
if ".json" in fn:
|
|
|
301 |
self.exp_files[fn.replace(".json", "")] = 1
|
302 |
#
|
303 |
|
304 |
+
# self._load_data()
|
305 |
|
306 |
if cfg.limit_bspn_vocab:
|
307 |
self.bspn_masks = self._construct_bspn_constraint()
|
|
|
497 |
|
498 |
if os.path.exists(encoded_file):
|
499 |
logging.info("Reading encoded data from {}".format(encoded_file))
|
500 |
+
# self.data = json.loads(open(cfg.data_path + cfg.data_file, "r", encoding="utf-8").read().lower())
|
501 |
encoded_data = json.loads(open(encoded_file, "r", encoding="utf-8").read())
|
502 |
self.train = encoded_data["train"]
|
503 |
self.dev = encoded_data["dev"]
|
|
|
505 |
else:
|
506 |
logging.info("Encoding data now and save the encoded data in {}".format(encoded_file))
|
507 |
# not exists, encode data and save
|
508 |
+
# self.data = json.loads(open(cfg.data_path + cfg.data_file, "r", encoding="utf-8").read().lower())
|
509 |
self.train, self.dev, self.test = [], [], []
|
510 |
for fn, dial in self.data.items():
|
511 |
if ".json" in fn:
|