oliverdk's picture
End of training
a7ef0da verified
raw
history blame contribute delete
494 Bytes
model:
dataset_name: redwoodresearch/diamonds-seed1
model_type: codegen
pretrained_model_name: Salesforce/codegen-350M-mono
max_length: 1024
model_config_params:
sensor_loc_type: locs_from_token
sensor_token: ' omit'
hparams:
learning_rate: 2.0e-05
weight_decay: 0.02
lr_scheduler_type: cosine
warmup_steps: 64
effective_batch_size: 32
num_train_epochs: 5
per_device_train_batch_size: 4
per_device_eval_batch_size: 4
fp16: true
dataset_len: null
push_to_hub: true