[2024-01-17 00:07:52,551][hydra][INFO] - model: name: EleutherAI/pythia-14m alias: pythia-14m revision: null subfolder: null precision: bf16 set_eos_to_pad: true dataset: name: gsm8k alias: gsm8k text_field: question max_length: 1024 trainer: group_by_length: false remove_unused_columns: true neftune_noise_alpha: null eval_accumulation_steps: 1 per_device_train_batch_size: 32 per_device_eval_batch_size: 20 gradient_accumulation_steps: 1 dataloader_num_workers: 8 dataloader_drop_last: false optim: adamw_torch_fused adafactor: false learning_rate: 0.0001 weight_decay: 0 adam_beta1: 0.9 adam_beta2: 0.999 adam_epsilon: 1.0e-08 max_grad_norm: 1.0 lr_scheduler_type: linear warmup_ratio: 0.0 warmup_steps: 0 num_train_epochs: 1 max_steps: -1 eval_steps: 100 output_dir: ./ logging_strategy: steps logging_first_step: true logging_steps: 1 log_level: info report_to: tensorboard logging_dir: tb_logs disable_tqdm: false push_to_hub: true save_strategy: epoch save_steps: 100 save_only_model: true seed: 42 data_seed: 42 full_determinism: true tf32: true lora: r: 64 lora_alpha: 16 bias: none task_type: CAUSAL_LM target_modules: null use_peft: true global_seed: 42 experiment_group: training run_name: pythia-14m_2024-01-17T00-07-52 ======================================================================