|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 23.529411764705884, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 0.20885063707828522, |
|
"learning_rate": 4.9980725906018074e-05, |
|
"loss": 0.8318, |
|
"num_input_tokens_seen": 121824, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 1.1764705882352942, |
|
"grad_norm": 0.21794493496418, |
|
"learning_rate": 4.99229333433282e-05, |
|
"loss": 0.7891, |
|
"num_input_tokens_seen": 239760, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.7647058823529411, |
|
"grad_norm": 0.20168891549110413, |
|
"learning_rate": 4.982671142387316e-05, |
|
"loss": 0.7678, |
|
"num_input_tokens_seen": 364912, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 0.20661190152168274, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"loss": 0.728, |
|
"num_input_tokens_seen": 487440, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.9411764705882355, |
|
"grad_norm": 0.2073347568511963, |
|
"learning_rate": 4.951963201008076e-05, |
|
"loss": 0.7364, |
|
"num_input_tokens_seen": 607888, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 3.5294117647058822, |
|
"grad_norm": 0.19631442427635193, |
|
"learning_rate": 4.9309248009941914e-05, |
|
"loss": 0.7217, |
|
"num_input_tokens_seen": 728656, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 4.117647058823529, |
|
"grad_norm": 0.22293810546398163, |
|
"learning_rate": 4.906138091134118e-05, |
|
"loss": 0.6901, |
|
"num_input_tokens_seen": 849216, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 4.705882352941177, |
|
"grad_norm": 0.2156902402639389, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 0.6761, |
|
"num_input_tokens_seen": 971440, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 5.294117647058823, |
|
"grad_norm": 0.22460030019283295, |
|
"learning_rate": 4.8454783398062106e-05, |
|
"loss": 0.6601, |
|
"num_input_tokens_seen": 1091264, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 5.882352941176471, |
|
"grad_norm": 0.2591679096221924, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"loss": 0.6439, |
|
"num_input_tokens_seen": 1211184, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 6.470588235294118, |
|
"grad_norm": 0.26881489157676697, |
|
"learning_rate": 4.7703579345627035e-05, |
|
"loss": 0.6181, |
|
"num_input_tokens_seen": 1334112, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 7.0588235294117645, |
|
"grad_norm": 0.3284054100513458, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 0.6145, |
|
"num_input_tokens_seen": 1454512, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 7.647058823529412, |
|
"grad_norm": 0.2977285385131836, |
|
"learning_rate": 4.681240017681993e-05, |
|
"loss": 0.5834, |
|
"num_input_tokens_seen": 1576640, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 8.235294117647058, |
|
"grad_norm": 0.3388771116733551, |
|
"learning_rate": 4.6316004108852305e-05, |
|
"loss": 0.5632, |
|
"num_input_tokens_seen": 1698624, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 8.823529411764707, |
|
"grad_norm": 0.3815699815750122, |
|
"learning_rate": 4.5786740307563636e-05, |
|
"loss": 0.5549, |
|
"num_input_tokens_seen": 1818688, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 9.411764705882353, |
|
"grad_norm": 0.37038519978523254, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 0.5151, |
|
"num_input_tokens_seen": 1942112, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.4679271876811981, |
|
"learning_rate": 4.463292327201862e-05, |
|
"loss": 0.5147, |
|
"num_input_tokens_seen": 2061552, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 10.588235294117647, |
|
"grad_norm": 0.4134647846221924, |
|
"learning_rate": 4.401014914000078e-05, |
|
"loss": 0.4635, |
|
"num_input_tokens_seen": 2185344, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 11.176470588235293, |
|
"grad_norm": 0.45239707827568054, |
|
"learning_rate": 4.335806273589214e-05, |
|
"loss": 0.4585, |
|
"num_input_tokens_seen": 2306256, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 11.764705882352942, |
|
"grad_norm": 0.5336123704910278, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 0.4291, |
|
"num_input_tokens_seen": 2426592, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.352941176470589, |
|
"grad_norm": 0.5823401212692261, |
|
"learning_rate": 4.197001863832355e-05, |
|
"loss": 0.3997, |
|
"num_input_tokens_seen": 2548672, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 12.941176470588236, |
|
"grad_norm": 0.5824088454246521, |
|
"learning_rate": 4.123620120825459e-05, |
|
"loss": 0.3797, |
|
"num_input_tokens_seen": 2667984, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 13.529411764705882, |
|
"grad_norm": 0.7273723483085632, |
|
"learning_rate": 4.047734873274586e-05, |
|
"loss": 0.3412, |
|
"num_input_tokens_seen": 2791904, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 14.117647058823529, |
|
"grad_norm": 0.6384756565093994, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 0.3298, |
|
"num_input_tokens_seen": 2910560, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 14.705882352941176, |
|
"grad_norm": 0.684781014919281, |
|
"learning_rate": 3.888925582549006e-05, |
|
"loss": 0.2863, |
|
"num_input_tokens_seen": 3034368, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 15.294117647058824, |
|
"grad_norm": 0.7853628396987915, |
|
"learning_rate": 3.8062464117898724e-05, |
|
"loss": 0.2738, |
|
"num_input_tokens_seen": 3153984, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 15.882352941176471, |
|
"grad_norm": 0.7987646460533142, |
|
"learning_rate": 3.721553103742388e-05, |
|
"loss": 0.2367, |
|
"num_input_tokens_seen": 3278336, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 16.470588235294116, |
|
"grad_norm": 0.74590665102005, |
|
"learning_rate": 3.634976249348867e-05, |
|
"loss": 0.2189, |
|
"num_input_tokens_seen": 3398224, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 17.058823529411764, |
|
"grad_norm": 0.8422712683677673, |
|
"learning_rate": 3.54664934384357e-05, |
|
"loss": 0.1971, |
|
"num_input_tokens_seen": 3519168, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 17.647058823529413, |
|
"grad_norm": 0.8479442000389099, |
|
"learning_rate": 3.456708580912725e-05, |
|
"loss": 0.1705, |
|
"num_input_tokens_seen": 3641392, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 18.235294117647058, |
|
"grad_norm": 0.8197467923164368, |
|
"learning_rate": 3.365292642693732e-05, |
|
"loss": 0.1454, |
|
"num_input_tokens_seen": 3764240, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"grad_norm": 1.0131207704544067, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 0.1387, |
|
"num_input_tokens_seen": 3882896, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 19.41176470588235, |
|
"grad_norm": 0.858586311340332, |
|
"learning_rate": 3.178601124662686e-05, |
|
"loss": 0.1191, |
|
"num_input_tokens_seen": 4004560, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.9852003455162048, |
|
"learning_rate": 3.083613409639764e-05, |
|
"loss": 0.1101, |
|
"num_input_tokens_seen": 4127360, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 20.58823529411765, |
|
"grad_norm": 0.885619580745697, |
|
"learning_rate": 2.9877258050403212e-05, |
|
"loss": 0.0885, |
|
"num_input_tokens_seen": 4248640, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 21.176470588235293, |
|
"grad_norm": 0.6304395794868469, |
|
"learning_rate": 2.8910861626005776e-05, |
|
"loss": 0.0865, |
|
"num_input_tokens_seen": 4369872, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 21.764705882352942, |
|
"grad_norm": 0.7621514797210693, |
|
"learning_rate": 2.7938434936445945e-05, |
|
"loss": 0.0708, |
|
"num_input_tokens_seen": 4490688, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 22.352941176470587, |
|
"grad_norm": 0.8263904452323914, |
|
"learning_rate": 2.6961477393196126e-05, |
|
"loss": 0.0715, |
|
"num_input_tokens_seen": 4612144, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 22.941176470588236, |
|
"grad_norm": 0.5912930965423584, |
|
"learning_rate": 2.598149539397672e-05, |
|
"loss": 0.0584, |
|
"num_input_tokens_seen": 4733632, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 23.529411764705884, |
|
"grad_norm": 0.6392534971237183, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0517, |
|
"num_input_tokens_seen": 4854048, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 4854048, |
|
"num_train_epochs": 50, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.807606338473165e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|