Femboyuwu2000 commited on
Commit
b27a06f
1 Parent(s): 3071398

Training in progress, step 3320, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf840ad1b562e045bc92ac31e01aee8f4cd67d1a73eca73333298edfbae1ad20
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cea960f50a7271a5b2bba71996a92c7510add6435cf258a5c6606f9afdf5a9f
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b115b07f9fe38375bcc96df9244e270a0061e3396e94a2fc540b35e81b2080b
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d9cb9f3e787a8028fa8d1eca6622c7c4dfbba28974558a4fd13d01082cfc82
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:709dbfd8db569dd59d549977ea4d372bd8a3fee22ca873f7f07f76ee57da1c03
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715afa7fa80b309ab5f3a3ffeab1c984abb1f23fc1fdeeb9a5d9e680a111f9d9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d10d973e668b5505b78d845d8bcd5008b3bbe632800fe01e15ed61bbadd63be9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3845b027cbf32ba8978367629ed9d5fe41b951f8e9b22618a87d5fbc8d6666
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.24,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1057,6 +1057,118 @@
1057
  "learning_rate": 2.8881318444640564e-05,
1058
  "loss": 3.5043,
1059
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1060
  }
1061
  ],
1062
  "logging_steps": 20,
@@ -1064,7 +1176,7 @@
1064
  "num_input_tokens_seen": 0,
1065
  "num_train_epochs": 2,
1066
  "save_steps": 20,
1067
- "total_flos": 7092703910756352.0,
1068
  "train_batch_size": 8,
1069
  "trial_name": null,
1070
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2656,
5
  "eval_steps": 500,
6
+ "global_step": 3320,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1057
  "learning_rate": 2.8881318444640564e-05,
1058
  "loss": 3.5043,
1059
  "step": 3000
1060
+ },
1061
+ {
1062
+ "epoch": 0.24,
1063
+ "grad_norm": 26.047521591186523,
1064
+ "learning_rate": 2.8862836261972873e-05,
1065
+ "loss": 3.6236,
1066
+ "step": 3020
1067
+ },
1068
+ {
1069
+ "epoch": 0.24,
1070
+ "grad_norm": 39.91059494018555,
1071
+ "learning_rate": 2.8844208664712577e-05,
1072
+ "loss": 3.4851,
1073
+ "step": 3040
1074
+ },
1075
+ {
1076
+ "epoch": 0.24,
1077
+ "grad_norm": 57.92033004760742,
1078
+ "learning_rate": 2.882543584825435e-05,
1079
+ "loss": 3.5578,
1080
+ "step": 3060
1081
+ },
1082
+ {
1083
+ "epoch": 0.25,
1084
+ "grad_norm": 99.40699005126953,
1085
+ "learning_rate": 2.880651800951616e-05,
1086
+ "loss": 3.577,
1087
+ "step": 3080
1088
+ },
1089
+ {
1090
+ "epoch": 0.25,
1091
+ "grad_norm": 30.7738094329834,
1092
+ "learning_rate": 2.8787455346937182e-05,
1093
+ "loss": 3.5683,
1094
+ "step": 3100
1095
+ },
1096
+ {
1097
+ "epoch": 0.25,
1098
+ "grad_norm": 55.61745834350586,
1099
+ "learning_rate": 2.876824806047573e-05,
1100
+ "loss": 3.4959,
1101
+ "step": 3120
1102
+ },
1103
+ {
1104
+ "epoch": 0.25,
1105
+ "grad_norm": 23.431480407714844,
1106
+ "learning_rate": 2.8748896351607145e-05,
1107
+ "loss": 3.5882,
1108
+ "step": 3140
1109
+ },
1110
+ {
1111
+ "epoch": 0.25,
1112
+ "grad_norm": 20.944149017333984,
1113
+ "learning_rate": 2.8729400423321693e-05,
1114
+ "loss": 3.6096,
1115
+ "step": 3160
1116
+ },
1117
+ {
1118
+ "epoch": 0.25,
1119
+ "grad_norm": 46.77301788330078,
1120
+ "learning_rate": 2.8709760480122443e-05,
1121
+ "loss": 3.5665,
1122
+ "step": 3180
1123
+ },
1124
+ {
1125
+ "epoch": 0.26,
1126
+ "grad_norm": 36.645328521728516,
1127
+ "learning_rate": 2.8689976728023103e-05,
1128
+ "loss": 3.5087,
1129
+ "step": 3200
1130
+ },
1131
+ {
1132
+ "epoch": 0.26,
1133
+ "grad_norm": 32.61063003540039,
1134
+ "learning_rate": 2.8670049374545873e-05,
1135
+ "loss": 3.5054,
1136
+ "step": 3220
1137
+ },
1138
+ {
1139
+ "epoch": 0.26,
1140
+ "grad_norm": 47.47910690307617,
1141
+ "learning_rate": 2.8649978628719256e-05,
1142
+ "loss": 3.5674,
1143
+ "step": 3240
1144
+ },
1145
+ {
1146
+ "epoch": 0.26,
1147
+ "grad_norm": 40.817115783691406,
1148
+ "learning_rate": 2.8629764701075885e-05,
1149
+ "loss": 3.4504,
1150
+ "step": 3260
1151
+ },
1152
+ {
1153
+ "epoch": 0.26,
1154
+ "grad_norm": 32.290626525878906,
1155
+ "learning_rate": 2.8609407803650295e-05,
1156
+ "loss": 3.4699,
1157
+ "step": 3280
1158
+ },
1159
+ {
1160
+ "epoch": 0.26,
1161
+ "grad_norm": 26.942697525024414,
1162
+ "learning_rate": 2.8588908149976702e-05,
1163
+ "loss": 3.5642,
1164
+ "step": 3300
1165
+ },
1166
+ {
1167
+ "epoch": 0.27,
1168
+ "grad_norm": 30.925710678100586,
1169
+ "learning_rate": 2.856826595508678e-05,
1170
+ "loss": 3.6097,
1171
+ "step": 3320
1172
  }
1173
  ],
1174
  "logging_steps": 20,
 
1176
  "num_input_tokens_seen": 0,
1177
  "num_train_epochs": 2,
1178
  "save_steps": 20,
1179
+ "total_flos": 7875682626207744.0,
1180
  "train_batch_size": 8,
1181
  "trial_name": null,
1182
  "trial_params": null