model: name: EleutherAI/pythia-14m alias: pythia-14m revision: null subfolder: null precision: bf16 set_eos_to_pad: true dataset: name: gsm8k alias: ${dataset.name} text_field: question max_length: 1024 trainer: group_by_length: false remove_unused_columns: true neftune_noise_alpha: null eval_accumulation_steps: 1 per_device_train_batch_size: 32 per_device_eval_batch_size: 20 gradient_accumulation_steps: 1 dataloader_num_workers: 8 dataloader_drop_last: false optim: adamw_torch_fused adafactor: false learning_rate: 0.0001 weight_decay: 0 adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-08 max_grad_norm: 1.0 lr_scheduler_type: linear warmup_ratio: 0.0 warmup_steps: 0 num_train_epochs: 1 max_steps: -1 eval_steps: 100 output_dir: ./ logging_strategy: steps logging_first_step: true logging_steps: 1 log_level: info report_to: tensorboard logging_dir: tb_logs disable_tqdm: false push_to_hub: true save_strategy: epoch save_steps: ${trainer.eval_steps} save_only_model: true seed: ${global_seed} data_seed: ${global_seed} full_determinism: true tf32: true lora: r: 64 lora_alpha: 16 bias: none task_type: CAUSAL_LM target_modules: null use_peft: true global_seed: 42 experiment_group: training run_name: ${model.alias}_${now:%Y-%m-%d}T${now:%H-%M-%S}