framework: bart | |
data_dir: ../../dataset | |
train_data: msd_balence | |
text_type: all | |
arch: transformer | |
workers: 8 | |
epochs: 100 | |
warmup_epochs: 20 | |
start_epoch: 0 | |
batch_size: 64 | |
world_size: 1 | |
lr: 0.0001 | |
min_lr: 1.0e-09 | |
rank: 0 | |
dist_url: tcp://localhost:12312 | |
dist_backend: nccl | |
seed: null | |
gpu: 1 | |
print_freq: 10 | |
multiprocessing_distributed: false | |
cos: true | |
bart_pretrain: false | |
label_smoothing: 0.1 | |
use_early_stopping: false | |
eval_sample: 64 | |
max_length: 128 |