Femboyuwu2000 commited on
Commit
c487d00
1 Parent(s): 9723ca9

Training in progress, step 6080, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26556f1abb53ee822dd03ecead2e694e90d90f9f7c06255c3fb3bb5bd95b1e95
3
  size 13982248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa290a324b0413b280d2bace84c9e16ff480cfdf57669d0e4aa091cc96a7f3a
3
  size 13982248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5267791793a613d797a09d60c3e5961f54bab0e1981820004fbccdcc8050fd4a
3
  size 7062522
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:155d752646efe8e0f6b33728b9a2153eb61f59385cfa4bc37a58d1e634a84306
3
  size 7062522
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27c1b58129752e66a4a302f63e7677b4460963afc004711e8b13996046bcd2c8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1814d7b39176db5919fcc37c67d779f76326615526c5fa198e939a90b9adff4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c45e18651ea77eb43c3817495dab8ccf485a98bc6d621a0b971d270fe188b5ec
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:541ffa1c9c0ef80e1cac33b6e46c86e1b24666070b4e0e70f8ae6ca13ce53e48
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4816,
5
  "eval_steps": 500,
6
- "global_step": 6020,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2114,6 +2114,27 @@
2114
  "learning_rate": 2.4583801313868417e-05,
2115
  "loss": 3.4601,
2116
  "step": 6020
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2117
  }
2118
  ],
2119
  "logging_steps": 20,
@@ -2121,7 +2142,7 @@
2121
  "num_input_tokens_seen": 0,
2122
  "num_train_epochs": 2,
2123
  "save_steps": 20,
2124
- "total_flos": 1.4256891905015808e+16,
2125
  "train_batch_size": 8,
2126
  "trial_name": null,
2127
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4864,
5
  "eval_steps": 500,
6
+ "global_step": 6080,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2114
  "learning_rate": 2.4583801313868417e-05,
2115
  "loss": 3.4601,
2116
  "step": 6020
2117
+ },
2118
+ {
2119
+ "epoch": 0.48,
2120
+ "grad_norm": 36.80402374267578,
2121
+ "learning_rate": 2.4546378803728922e-05,
2122
+ "loss": 3.5053,
2123
+ "step": 6040
2124
+ },
2125
+ {
2126
+ "epoch": 0.48,
2127
+ "grad_norm": 25.655963897705078,
2128
+ "learning_rate": 2.450885615659305e-05,
2129
+ "loss": 3.4791,
2130
+ "step": 6060
2131
+ },
2132
+ {
2133
+ "epoch": 0.49,
2134
+ "grad_norm": 47.66796112060547,
2135
+ "learning_rate": 2.447123376605561e-05,
2136
+ "loss": 3.4535,
2137
+ "step": 6080
2138
  }
2139
  ],
2140
  "logging_steps": 20,
 
2142
  "num_input_tokens_seen": 0,
2143
  "num_train_epochs": 2,
2144
  "save_steps": 20,
2145
+ "total_flos": 1.4386015976914944e+16,
2146
  "train_batch_size": 8,
2147
  "trial_name": null,
2148
  "trial_params": null