Nexspear commited on
Commit
04e765f
·
verified ·
1 Parent(s): 5f22b2c

Training in progress, step 462, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e28db0f053f0469288d9c661a2de0e7721acd04f65d12b2744dda53823792f55
3
  size 100966336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3457743154bf487a605b17ae1ca8ffa677506a228d2de5556e8a58ccfe3021f
3
  size 100966336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e52fae042af788b20ffbe20c3dffe04e516add806bf2e66c036ba1becc61501
3
  size 51613668
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:850e3398e2d7223a85dd20ee485345b8d225137582b8621c64cd7348a66254fc
3
  size 51613668
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79e5ef6a566f54a2b32cb3e8d5a68a7370551bdcc68ea3fc820d6c009103c0c9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e44f8551f3c2c2d0e97d369427c4b2fc820d540a88b323a1fdeba466dacfd58
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7641dde43bc7a22d17d22ddcaa29ef3541065d43d71357b77f45ce61017cfec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a75ab01cc15879cd61ff8e586fb370a9b8a51bf7b319e44e27d87274e2e703
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06291899179806,
5
  "eval_steps": 42,
6
- "global_step": 420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1075,6 +1075,112 @@
1075
  "eval_samples_per_second": 35.94,
1076
  "eval_steps_per_second": 4.494,
1077
  "step": 420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1078
  }
1079
  ],
1080
  "logging_steps": 3,
@@ -1094,7 +1200,7 @@
1094
  "attributes": {}
1095
  }
1096
  },
1097
- "total_flos": 1.3537807518872371e+17,
1098
  "train_batch_size": 8,
1099
  "trial_name": null,
1100
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.069210890977866,
5
  "eval_steps": 42,
6
+ "global_step": 462,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1075
  "eval_samples_per_second": 35.94,
1076
  "eval_steps_per_second": 4.494,
1077
  "step": 420
1078
+ },
1079
+ {
1080
+ "epoch": 0.06336841316804614,
1081
+ "grad_norm": 0.1810549646615982,
1082
+ "learning_rate": 2.98511170358155e-06,
1083
+ "loss": 1.0827,
1084
+ "step": 423
1085
+ },
1086
+ {
1087
+ "epoch": 0.06381783453803229,
1088
+ "grad_norm": 0.1720385104417801,
1089
+ "learning_rate": 2.7613352995397078e-06,
1090
+ "loss": 1.1188,
1091
+ "step": 426
1092
+ },
1093
+ {
1094
+ "epoch": 0.06426725590801842,
1095
+ "grad_norm": 0.1696414053440094,
1096
+ "learning_rate": 2.545785969664524e-06,
1097
+ "loss": 1.0283,
1098
+ "step": 429
1099
+ },
1100
+ {
1101
+ "epoch": 0.06471667727800456,
1102
+ "grad_norm": 0.1651046723127365,
1103
+ "learning_rate": 2.338543455269046e-06,
1104
+ "loss": 1.1545,
1105
+ "step": 432
1106
+ },
1107
+ {
1108
+ "epoch": 0.06516609864799071,
1109
+ "grad_norm": 0.1733168065547943,
1110
+ "learning_rate": 2.1396844246046903e-06,
1111
+ "loss": 1.1155,
1112
+ "step": 435
1113
+ },
1114
+ {
1115
+ "epoch": 0.06561552001797685,
1116
+ "grad_norm": 0.1659458726644516,
1117
+ "learning_rate": 1.949282444498238e-06,
1118
+ "loss": 1.1315,
1119
+ "step": 438
1120
+ },
1121
+ {
1122
+ "epoch": 0.066064941387963,
1123
+ "grad_norm": 0.16362150013446808,
1124
+ "learning_rate": 1.767407953136202e-06,
1125
+ "loss": 1.132,
1126
+ "step": 441
1127
+ },
1128
+ {
1129
+ "epoch": 0.06651436275794914,
1130
+ "grad_norm": 0.162877157330513,
1131
+ "learning_rate": 1.59412823400657e-06,
1132
+ "loss": 1.0462,
1133
+ "step": 444
1134
+ },
1135
+ {
1136
+ "epoch": 0.06696378412793529,
1137
+ "grad_norm": 0.1876746267080307,
1138
+ "learning_rate": 1.4295073910076757e-06,
1139
+ "loss": 1.03,
1140
+ "step": 447
1141
+ },
1142
+ {
1143
+ "epoch": 0.06741320549792143,
1144
+ "grad_norm": 0.18567495048046112,
1145
+ "learning_rate": 1.273606324733284e-06,
1146
+ "loss": 1.0879,
1147
+ "step": 450
1148
+ },
1149
+ {
1150
+ "epoch": 0.06786262686790757,
1151
+ "grad_norm": 0.2132405787706375,
1152
+ "learning_rate": 1.1264827099427417e-06,
1153
+ "loss": 1.0568,
1154
+ "step": 453
1155
+ },
1156
+ {
1157
+ "epoch": 0.0683120482378937,
1158
+ "grad_norm": 0.14907406270503998,
1159
+ "learning_rate": 9.881909742245177e-07,
1160
+ "loss": 1.0681,
1161
+ "step": 456
1162
+ },
1163
+ {
1164
+ "epoch": 0.06876146960787985,
1165
+ "grad_norm": 0.1873130202293396,
1166
+ "learning_rate": 8.587822778610283e-07,
1167
+ "loss": 1.0831,
1168
+ "step": 459
1169
+ },
1170
+ {
1171
+ "epoch": 0.069210890977866,
1172
+ "grad_norm": 0.18441098928451538,
1173
+ "learning_rate": 7.383044949021339e-07,
1174
+ "loss": 1.1253,
1175
+ "step": 462
1176
+ },
1177
+ {
1178
+ "epoch": 0.069210890977866,
1179
+ "eval_loss": 1.0889204740524292,
1180
+ "eval_runtime": 312.7411,
1181
+ "eval_samples_per_second": 35.95,
1182
+ "eval_steps_per_second": 4.496,
1183
+ "step": 462
1184
  }
1185
  ],
1186
  "logging_steps": 3,
 
1200
  "attributes": {}
1201
  }
1202
  },
1203
+ "total_flos": 1.489210915600466e+17,
1204
  "train_batch_size": 8,
1205
  "trial_name": null,
1206
  "trial_params": null