nguyenvulebinh
commited on
Commit
•
18b4250
1
Parent(s):
58c3693
add random print sample when eval
Browse files
main.py
CHANGED
@@ -150,7 +150,7 @@ if __name__ == "__main__":
|
|
150 |
cache_file_name=os.path.join(cache_processing_dataset_folder, 'train',
|
151 |
'cache-train-shard-{}.arrow'.format(
|
152 |
train_dataset_shard_idx))
|
153 |
-
)
|
154 |
# load test shard subset
|
155 |
test_dataset = load_prepared_dataset(os.path.join(test_dataset_root_folder,
|
156 |
'shard_{}'.format(test_dataset_shard_idx)),
|
@@ -172,9 +172,6 @@ if __name__ == "__main__":
|
|
172 |
callbacks=[BreakEachEpoch()] # Manual break end of epoch because each epoch loop over a shard
|
173 |
)
|
174 |
|
175 |
-
# training_args.num_train_epochs = epoch_idx + 1
|
176 |
-
|
177 |
-
logging.get_logger().info('Train epoch {}'.format(training_args.num_train_epochs))
|
178 |
logging.get_logger().info('Train shard idx: {} / {}'.format(train_dataset_shard_idx + 1, num_train_shards))
|
179 |
logging.get_logger().info(
|
180 |
'Valid shard idx: {} / {} sub_shard: {}'.format(test_dataset_shard_idx + 1, num_test_shards, idx_sub_shard))
|
|
|
150 |
cache_file_name=os.path.join(cache_processing_dataset_folder, 'train',
|
151 |
'cache-train-shard-{}.arrow'.format(
|
152 |
train_dataset_shard_idx))
|
153 |
+
) # .shard(1000, 0) # Remove shard split when train
|
154 |
# load test shard subset
|
155 |
test_dataset = load_prepared_dataset(os.path.join(test_dataset_root_folder,
|
156 |
'shard_{}'.format(test_dataset_shard_idx)),
|
|
|
172 |
callbacks=[BreakEachEpoch()] # Manual break end of epoch because each epoch loop over a shard
|
173 |
)
|
174 |
|
|
|
|
|
|
|
175 |
logging.get_logger().info('Train shard idx: {} / {}'.format(train_dataset_shard_idx + 1, num_train_shards))
|
176 |
logging.get_logger().info(
|
177 |
'Valid shard idx: {} / {} sub_shard: {}'.format(test_dataset_shard_idx + 1, num_test_shards, idx_sub_shard))
|