|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5805515239477503, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005805515239477504, |
|
"eval_loss": 1.2620487213134766, |
|
"eval_runtime": 8.1244, |
|
"eval_samples_per_second": 35.695, |
|
"eval_steps_per_second": 4.554, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01741654571843251, |
|
"grad_norm": 0.4328065812587738, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.2349, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03483309143686502, |
|
"grad_norm": 0.46443697810173035, |
|
"learning_rate": 3e-05, |
|
"loss": 1.2858, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05224963715529753, |
|
"grad_norm": 0.44606202840805054, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.2235, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05224963715529753, |
|
"eval_loss": 1.2377574443817139, |
|
"eval_runtime": 8.1496, |
|
"eval_samples_per_second": 35.584, |
|
"eval_steps_per_second": 4.54, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06966618287373004, |
|
"grad_norm": 0.5522386431694031, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 1.2714, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08708272859216255, |
|
"grad_norm": 0.5637324452400208, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 1.1623, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10449927431059507, |
|
"grad_norm": 0.5763392448425293, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 1.0162, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10449927431059507, |
|
"eval_loss": 1.0046803951263428, |
|
"eval_runtime": 8.16, |
|
"eval_samples_per_second": 35.539, |
|
"eval_steps_per_second": 4.534, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.12191582002902758, |
|
"grad_norm": 0.5673230886459351, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 0.9585, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13933236574746008, |
|
"grad_norm": 0.47465190291404724, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 0.8755, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1567489114658926, |
|
"grad_norm": 0.5033220052719116, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 0.788, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1567489114658926, |
|
"eval_loss": 0.743035078048706, |
|
"eval_runtime": 8.1945, |
|
"eval_samples_per_second": 35.389, |
|
"eval_steps_per_second": 4.515, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1741654571843251, |
|
"grad_norm": 0.4010712802410126, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 0.6837, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.19158200290275762, |
|
"grad_norm": 0.38208135962486267, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 0.6335, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.20899854862119013, |
|
"grad_norm": 0.3118175268173218, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 0.5961, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.20899854862119013, |
|
"eval_loss": 0.5937825441360474, |
|
"eval_runtime": 8.2105, |
|
"eval_samples_per_second": 35.32, |
|
"eval_steps_per_second": 4.506, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.22641509433962265, |
|
"grad_norm": 0.26881372928619385, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 0.61, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.24383164005805516, |
|
"grad_norm": 0.2959866225719452, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 0.5413, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2612481857764877, |
|
"grad_norm": 0.31090644001960754, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 0.5359, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2612481857764877, |
|
"eval_loss": 0.5296854972839355, |
|
"eval_runtime": 8.2316, |
|
"eval_samples_per_second": 35.23, |
|
"eval_steps_per_second": 4.495, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.27866473149492016, |
|
"grad_norm": 0.30757585167884827, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 0.4991, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2960812772133527, |
|
"grad_norm": 0.2986898720264435, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 0.5406, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.3134978229317852, |
|
"grad_norm": 0.29928526282310486, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 0.5209, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3134978229317852, |
|
"eval_loss": 0.499483197927475, |
|
"eval_runtime": 8.228, |
|
"eval_samples_per_second": 35.245, |
|
"eval_steps_per_second": 4.497, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3309143686502177, |
|
"grad_norm": 0.3250415325164795, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 0.5339, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3483309143686502, |
|
"grad_norm": 0.2665368616580963, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.4771, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.36574746008708275, |
|
"grad_norm": 0.2685997784137726, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 0.4493, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.36574746008708275, |
|
"eval_loss": 0.48386040329933167, |
|
"eval_runtime": 8.2199, |
|
"eval_samples_per_second": 35.28, |
|
"eval_steps_per_second": 4.501, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.38316400580551524, |
|
"grad_norm": 0.3184368908405304, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 0.4958, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4005805515239477, |
|
"grad_norm": 0.2603450417518616, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 0.4831, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.41799709724238027, |
|
"grad_norm": 0.3131684362888336, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 0.4193, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.41799709724238027, |
|
"eval_loss": 0.4739016890525818, |
|
"eval_runtime": 8.222, |
|
"eval_samples_per_second": 35.271, |
|
"eval_steps_per_second": 4.5, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.43541364296081275, |
|
"grad_norm": 0.2975787818431854, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 0.4181, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.4528301886792453, |
|
"grad_norm": 0.3197750747203827, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 0.4859, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4702467343976778, |
|
"grad_norm": 0.29346856474876404, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 0.4102, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4702467343976778, |
|
"eval_loss": 0.46982458233833313, |
|
"eval_runtime": 8.2212, |
|
"eval_samples_per_second": 35.274, |
|
"eval_steps_per_second": 4.501, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4876632801161103, |
|
"grad_norm": 0.30700182914733887, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 0.5111, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5050798258345428, |
|
"grad_norm": 0.3279879689216614, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 0.4594, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5224963715529753, |
|
"grad_norm": 0.2903614938259125, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 0.4378, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5224963715529753, |
|
"eval_loss": 0.4680546224117279, |
|
"eval_runtime": 8.2219, |
|
"eval_samples_per_second": 35.272, |
|
"eval_steps_per_second": 4.5, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5399129172714079, |
|
"grad_norm": 0.32795748114585876, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 0.503, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5573294629898403, |
|
"grad_norm": 0.3038257658481598, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 0.4698, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5747460087082729, |
|
"grad_norm": 0.30126094818115234, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 0.4015, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5747460087082729, |
|
"eval_loss": 0.4675796627998352, |
|
"eval_runtime": 8.2149, |
|
"eval_samples_per_second": 35.302, |
|
"eval_steps_per_second": 4.504, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.20235311726592e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|