framework: bart | |
data_dir: ../../dataset | |
train_data: msd_balence | |
text_type: all | |
arch: transformer | |
workers: 12 | |
epochs: 4096 | |
warmup_epochs: 125 | |
start_epoch: 0 | |
batch_size: 256 | |
world_size: 1 | |
lr: 0.0001 | |
min_lr: 1.0e-09 | |
rank: 0 | |
dist_url: tcp://localhost:12312 | |
dist_backend: nccl | |
seed: null | |
gpu: 0 | |
print_freq: 100 | |
multiprocessing_distributed: false | |
cos: true | |
bart_pretrain: false | |
label_smoothing: 0.1 | |
use_early_stopping: false | |
eval_sample: 0 | |
max_length: 110 | |
distributed: false | |