File size: 4,002 Bytes
c7b84fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.04387311894002545,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.002193655947001272,
"grad_norm": 3.5356061458587646,
"learning_rate": 2.997806344052999e-05,
"loss": 0.2274,
"step": 50
},
{
"epoch": 0.004387311894002544,
"grad_norm": 0.4738060534000397,
"learning_rate": 2.9956126881059976e-05,
"loss": 0.0055,
"step": 100
},
{
"epoch": 0.006580967841003817,
"grad_norm": 0.18818098306655884,
"learning_rate": 2.993419032158996e-05,
"loss": 0.0014,
"step": 150
},
{
"epoch": 0.008774623788005089,
"grad_norm": 0.009225570596754551,
"learning_rate": 2.991225376211995e-05,
"loss": 0.0005,
"step": 200
},
{
"epoch": 0.010968279735006362,
"grad_norm": 0.032771218568086624,
"learning_rate": 2.9890317202649936e-05,
"loss": 0.0009,
"step": 250
},
{
"epoch": 0.013161935682007634,
"grad_norm": 0.03574460744857788,
"learning_rate": 2.9868380643179925e-05,
"loss": 0.0005,
"step": 300
},
{
"epoch": 0.015355591629008906,
"grad_norm": 0.0016715412493795156,
"learning_rate": 2.984644408370991e-05,
"loss": 0.0001,
"step": 350
},
{
"epoch": 0.017549247576010178,
"grad_norm": 0.0006590808043256402,
"learning_rate": 2.9824507524239897e-05,
"loss": 0.0001,
"step": 400
},
{
"epoch": 0.01974290352301145,
"grad_norm": 0.00635514734312892,
"learning_rate": 2.9802570964769886e-05,
"loss": 0.0002,
"step": 450
},
{
"epoch": 0.021936559470012724,
"grad_norm": 0.0005727710667997599,
"learning_rate": 2.9780634405299875e-05,
"loss": 0.0001,
"step": 500
},
{
"epoch": 0.024130215417013996,
"grad_norm": 0.0005088996258564293,
"learning_rate": 2.975869784582986e-05,
"loss": 0.0002,
"step": 550
},
{
"epoch": 0.026323871364015268,
"grad_norm": 0.010593525134027004,
"learning_rate": 2.9736761286359847e-05,
"loss": 0.0002,
"step": 600
},
{
"epoch": 0.02851752731101654,
"grad_norm": 0.006351508665829897,
"learning_rate": 2.9714824726889836e-05,
"loss": 0.0001,
"step": 650
},
{
"epoch": 0.03071118325801781,
"grad_norm": 0.0015719968359917402,
"learning_rate": 2.969288816741982e-05,
"loss": 0.0,
"step": 700
},
{
"epoch": 0.03290483920501908,
"grad_norm": 0.0007212675409391522,
"learning_rate": 2.967095160794981e-05,
"loss": 0.0,
"step": 750
},
{
"epoch": 0.035098495152020355,
"grad_norm": 0.00027612957637757063,
"learning_rate": 2.96490150484798e-05,
"loss": 0.0,
"step": 800
},
{
"epoch": 0.03729215109902163,
"grad_norm": 0.0005350236897356808,
"learning_rate": 2.9627078489009782e-05,
"loss": 0.0,
"step": 850
},
{
"epoch": 0.0394858070460229,
"grad_norm": 0.0006030969670973718,
"learning_rate": 2.960514192953977e-05,
"loss": 0.0,
"step": 900
},
{
"epoch": 0.04167946299302418,
"grad_norm": 0.00019211292965337634,
"learning_rate": 2.958320537006976e-05,
"loss": 0.0001,
"step": 950
},
{
"epoch": 0.04387311894002545,
"grad_norm": 0.00020958702953066677,
"learning_rate": 2.9561268810599746e-05,
"loss": 0.0,
"step": 1000
}
],
"logging_steps": 50,
"max_steps": 68379,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 200,
"total_flos": 0.0,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}
|