farmery commited on
Commit
7fa8e0b
·
verified ·
1 Parent(s): 7455acf

Training in progress, step 462, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73933fec6f39ff31db8b4063588eb471127f6937633d45d1c3cc37e646eb1f1a
3
  size 251748704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37cf3369fc4b4dddec6b9e317be776ded9712a8b15466e0d98ba0bf3e66afe57
3
  size 251748704
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c5760d20545fd2a01ca6f9f4c34b708052b116e7c07230cf2bf22840626e187
3
  size 128585300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83bd32947a229874ca5ba692cc6fb122702696c30165573bcc776cd8f51d33ec
3
  size 128585300
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e2e8252e83bd5cb5ab2bae1256afc20758f78bcd5fbe5ea0d60fb2a141e1a7d
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad549467aee21b1d7841355db396e8f80727b2cce0c91d316fca55cfd3f145e
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:351eb17693dc7c368b46ffc4b9a63a06cd04709ba8be0c043bb013a87b50a082
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aabdae8fa17d1f82073d1f2d4117f3b2c974d593012d8b40bf05683705e1d30
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35311861be4c0bd5f63e4e6b59540c760ecd18629f1a0c00f03730317db34397
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89849a77ec6561cbbf56f2e2c9c58d90b30d57d1de75d3864248f6cbf0735630
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce723a195946d3a4ab64decb27f015fa2225e0bf59a73b5ef3fc3b70f932fbcc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb8f124b2e4b7af08f331fbd2f11e354c981be3f4ab87e1dc330486d9d9d31d
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7641dde43bc7a22d17d22ddcaa29ef3541065d43d71357b77f45ce61017cfec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a75ab01cc15879cd61ff8e586fb370a9b8a51bf7b319e44e27d87274e2e703
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4788732394366197,
5
  "eval_steps": 42,
6
- "global_step": 420,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1075,6 +1075,112 @@
1075
  "eval_samples_per_second": 57.06,
1076
  "eval_steps_per_second": 1.79,
1077
  "step": 420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1078
  }
1079
  ],
1080
  "logging_steps": 3,
@@ -1094,7 +1200,7 @@
1094
  "attributes": {}
1095
  }
1096
  },
1097
- "total_flos": 3.5221252114009293e+18,
1098
  "train_batch_size": 8,
1099
  "trial_name": null,
1100
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.6267605633802817,
5
  "eval_steps": 42,
6
+ "global_step": 462,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1075
  "eval_samples_per_second": 57.06,
1076
  "eval_steps_per_second": 1.79,
1077
  "step": 420
1078
+ },
1079
+ {
1080
+ "epoch": 1.48943661971831,
1081
+ "grad_norm": 1.5658754110336304,
1082
+ "learning_rate": 2.98511170358155e-06,
1083
+ "loss": 0.452,
1084
+ "step": 423
1085
+ },
1086
+ {
1087
+ "epoch": 1.5,
1088
+ "grad_norm": 1.5586401224136353,
1089
+ "learning_rate": 2.7613352995397078e-06,
1090
+ "loss": 0.4307,
1091
+ "step": 426
1092
+ },
1093
+ {
1094
+ "epoch": 1.51056338028169,
1095
+ "grad_norm": 1.8097403049468994,
1096
+ "learning_rate": 2.545785969664524e-06,
1097
+ "loss": 0.4415,
1098
+ "step": 429
1099
+ },
1100
+ {
1101
+ "epoch": 1.5211267605633803,
1102
+ "grad_norm": 2.813575267791748,
1103
+ "learning_rate": 2.338543455269046e-06,
1104
+ "loss": 0.4346,
1105
+ "step": 432
1106
+ },
1107
+ {
1108
+ "epoch": 1.5316901408450705,
1109
+ "grad_norm": 1.5137277841567993,
1110
+ "learning_rate": 2.1396844246046903e-06,
1111
+ "loss": 0.4203,
1112
+ "step": 435
1113
+ },
1114
+ {
1115
+ "epoch": 1.5422535211267605,
1116
+ "grad_norm": 1.5360363721847534,
1117
+ "learning_rate": 1.949282444498238e-06,
1118
+ "loss": 0.4264,
1119
+ "step": 438
1120
+ },
1121
+ {
1122
+ "epoch": 1.5528169014084507,
1123
+ "grad_norm": 1.4588457345962524,
1124
+ "learning_rate": 1.767407953136202e-06,
1125
+ "loss": 0.4363,
1126
+ "step": 441
1127
+ },
1128
+ {
1129
+ "epoch": 1.563380281690141,
1130
+ "grad_norm": 1.5039774179458618,
1131
+ "learning_rate": 1.59412823400657e-06,
1132
+ "loss": 0.4399,
1133
+ "step": 444
1134
+ },
1135
+ {
1136
+ "epoch": 1.573943661971831,
1137
+ "grad_norm": 1.6695863008499146,
1138
+ "learning_rate": 1.4295073910076757e-06,
1139
+ "loss": 0.4678,
1140
+ "step": 447
1141
+ },
1142
+ {
1143
+ "epoch": 1.584507042253521,
1144
+ "grad_norm": 1.5582469701766968,
1145
+ "learning_rate": 1.273606324733284e-06,
1146
+ "loss": 0.4264,
1147
+ "step": 450
1148
+ },
1149
+ {
1150
+ "epoch": 1.5950704225352113,
1151
+ "grad_norm": 1.6020346879959106,
1152
+ "learning_rate": 1.1264827099427417e-06,
1153
+ "loss": 0.4423,
1154
+ "step": 453
1155
+ },
1156
+ {
1157
+ "epoch": 1.6056338028169015,
1158
+ "grad_norm": 1.8120399713516235,
1159
+ "learning_rate": 9.881909742245177e-07,
1160
+ "loss": 0.4793,
1161
+ "step": 456
1162
+ },
1163
+ {
1164
+ "epoch": 1.6161971830985915,
1165
+ "grad_norm": 1.804922342300415,
1166
+ "learning_rate": 8.587822778610283e-07,
1167
+ "loss": 0.4396,
1168
+ "step": 459
1169
+ },
1170
+ {
1171
+ "epoch": 1.6267605633802817,
1172
+ "grad_norm": 1.4649447202682495,
1173
+ "learning_rate": 7.383044949021339e-07,
1174
+ "loss": 0.4505,
1175
+ "step": 462
1176
+ },
1177
+ {
1178
+ "epoch": 1.6267605633802817,
1179
+ "eval_loss": 0.12798862159252167,
1180
+ "eval_runtime": 33.5293,
1181
+ "eval_samples_per_second": 57.055,
1182
+ "eval_steps_per_second": 1.789,
1183
+ "step": 462
1184
  }
1185
  ],
1186
  "logging_steps": 3,
 
1200
  "attributes": {}
1201
  }
1202
  },
1203
+ "total_flos": 3.874337732541022e+18,
1204
  "train_batch_size": 8,
1205
  "trial_name": null,
1206
  "trial_params": null