{ "best_metric": 0.8720703125, "best_model_checkpoint": "./alma-7b-parallel-ft-lora-canto/checkpoint-589", "epoch": 1.99673735725938, "global_step": 612, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 0.0006831300510639731, "loss": 1.8545, "step": 31 }, { "epoch": 0.1, "eval_loss": 1.275390625, "eval_runtime": 161.389, "eval_samples_per_second": 26.049, "eval_steps_per_second": 1.63, "step": 31 }, { "epoch": 0.2, "learning_rate": 0.0003944053188733077, "loss": 1.2078, "step": 62 }, { "epoch": 0.2, "eval_loss": 1.1513671875, "eval_runtime": 161.4937, "eval_samples_per_second": 26.032, "eval_steps_per_second": 1.629, "step": 62 }, { "epoch": 0.3, "learning_rate": 0.00030348848933344196, "loss": 1.1077, "step": 93 }, { "epoch": 0.3, "eval_loss": 1.0869140625, "eval_runtime": 161.573, "eval_samples_per_second": 26.019, "eval_steps_per_second": 1.628, "step": 93 }, { "epoch": 0.4, "learning_rate": 0.00025577443333465435, "loss": 1.067, "step": 124 }, { "epoch": 0.4, "eval_loss": 1.0439453125, "eval_runtime": 161.4591, "eval_samples_per_second": 26.038, "eval_steps_per_second": 1.629, "step": 124 }, { "epoch": 0.51, "learning_rate": 0.0002252213082307254, "loss": 1.0179, "step": 155 }, { "epoch": 0.51, "eval_loss": 1.015625, "eval_runtime": 161.6031, "eval_samples_per_second": 26.014, "eval_steps_per_second": 1.627, "step": 155 }, { "epoch": 0.61, "learning_rate": 0.00020351933162035313, "loss": 0.9852, "step": 186 }, { "epoch": 0.61, "eval_loss": 0.98876953125, "eval_runtime": 161.723, "eval_samples_per_second": 25.995, "eval_steps_per_second": 1.626, "step": 186 }, { "epoch": 0.71, "learning_rate": 0.0001870828693386971, "loss": 0.9682, "step": 217 }, { "epoch": 0.71, "eval_loss": 0.97314453125, "eval_runtime": 161.5091, "eval_samples_per_second": 26.029, "eval_steps_per_second": 1.628, "step": 217 }, { "epoch": 0.81, "learning_rate": 0.00017407765595569785, "loss": 0.9565, "step": 248 }, { "epoch": 0.81, "eval_loss": 0.95751953125, "eval_runtime": 161.4684, "eval_samples_per_second": 26.036, "eval_steps_per_second": 1.629, "step": 248 }, { "epoch": 0.91, "learning_rate": 0.00016345506187300654, "loss": 0.9322, "step": 279 }, { "epoch": 0.91, "eval_loss": 0.94091796875, "eval_runtime": 161.6846, "eval_samples_per_second": 26.001, "eval_steps_per_second": 1.627, "step": 279 }, { "epoch": 1.01, "learning_rate": 0.0001545664419689318, "loss": 0.9079, "step": 310 }, { "epoch": 1.01, "eval_loss": 0.93212890625, "eval_runtime": 159.6077, "eval_samples_per_second": 26.34, "eval_steps_per_second": 1.648, "step": 310 }, { "epoch": 1.11, "learning_rate": 0.00014698618394803282, "loss": 0.8272, "step": 341 }, { "epoch": 1.11, "eval_loss": 0.92333984375, "eval_runtime": 159.6433, "eval_samples_per_second": 26.334, "eval_steps_per_second": 1.647, "step": 341 }, { "epoch": 1.21, "learning_rate": 0.0001404218994998819, "loss": 0.7966, "step": 372 }, { "epoch": 1.21, "eval_loss": 0.92041015625, "eval_runtime": 159.6168, "eval_samples_per_second": 26.338, "eval_steps_per_second": 1.648, "step": 372 }, { "epoch": 1.31, "learning_rate": 0.00013466519604525415, "loss": 0.7947, "step": 403 }, { "epoch": 1.31, "eval_loss": 0.9150390625, "eval_runtime": 159.659, "eval_samples_per_second": 26.331, "eval_steps_per_second": 1.647, "step": 403 }, { "epoch": 1.42, "learning_rate": 0.00012956299912940142, "loss": 0.8089, "step": 434 }, { "epoch": 1.42, "eval_loss": 0.9052734375, "eval_runtime": 159.599, "eval_samples_per_second": 26.341, "eval_steps_per_second": 1.648, "step": 434 }, { "epoch": 1.52, "learning_rate": 0.000125, "loss": 0.8113, "step": 465 }, { "epoch": 1.52, "eval_loss": 0.89990234375, "eval_runtime": 159.5797, "eval_samples_per_second": 26.344, "eval_steps_per_second": 1.648, "step": 465 }, { "epoch": 1.62, "learning_rate": 0.00012088746297956931, "loss": 0.7849, "step": 496 }, { "epoch": 1.62, "eval_loss": 0.89111328125, "eval_runtime": 159.5898, "eval_samples_per_second": 26.343, "eval_steps_per_second": 1.648, "step": 496 }, { "epoch": 1.72, "learning_rate": 0.00011715583722580123, "loss": 0.7796, "step": 527 }, { "epoch": 1.72, "eval_loss": 0.8828125, "eval_runtime": 159.6567, "eval_samples_per_second": 26.331, "eval_steps_per_second": 1.647, "step": 527 }, { "epoch": 1.82, "learning_rate": 0.00011374973339937476, "loss": 0.7768, "step": 558 }, { "epoch": 1.82, "eval_loss": 0.87548828125, "eval_runtime": 159.5528, "eval_samples_per_second": 26.349, "eval_steps_per_second": 1.648, "step": 558 }, { "epoch": 1.92, "learning_rate": 0.00011062441971717747, "loss": 0.7784, "step": 589 }, { "epoch": 1.92, "eval_loss": 0.8720703125, "eval_runtime": 159.542, "eval_samples_per_second": 26.35, "eval_steps_per_second": 1.648, "step": 589 }, { "epoch": 2.0, "step": 612, "total_flos": 7.958718731863982e+17, "train_loss": 0.9485874550015319, "train_runtime": 6936.4877, "train_samples_per_second": 5.654, "train_steps_per_second": 0.088 } ], "max_steps": 612, "num_train_epochs": 2, "total_flos": 7.958718731863982e+17, "trial_name": null, "trial_params": null }