model: | |
dataset_name: redwoodresearch/diamonds-seed1 | |
model_type: codegen | |
pretrained_model_name: Salesforce/codegen-350M-mono | |
max_length: 1024 | |
model_config_params: | |
sensor_loc_type: locs_from_token | |
sensor_token: ' omit' | |
hparams: | |
learning_rate: 2.0e-05 | |
weight_decay: 0.02 | |
lr_scheduler_type: cosine | |
warmup_steps: 64 | |
effective_batch_size: 32 | |
num_train_epochs: 5 | |
per_device_train_batch_size: 4 | |
per_device_eval_batch_size: 4 | |
fp16: true | |
dataset_len: null | |
push_to_hub: true | |