SummerSigh commited on
Commit
2df8cec
1 Parent(s): 9eacaa8

Upload 8 files

Browse files
Files changed (4) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +44 -4
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f4df0c785b7791245287fe1c5f680e6463fd9e47e7d8a4dbb5ea0c9105256ec
3
  size 18494040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:417693109801c7441187e7add71a3bad8d2637305d3b4bd76ecd18306ef418b8
3
  size 18494040
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:445da9904e4385c935c446ab197148de99346155c2e6a4fe40680c2eab4c8124
3
  size 37035002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6037b31fbc3053fb0bad4fc42595272953fa60e94cfca460ea449291e2a5b05d
3
  size 37035002
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbdd2f29a31ef5687e75985709b03210edb0e25b1f2f17c9a08fb06f7dc22b57
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:334b89593a1e587cb5ba4d78087b7def7081a35b3f61f6ea53a4250201c6477a
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.001,
5
  "eval_steps": 500,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -23,11 +23,51 @@
23
  "loss": 7.7872,
24
  "num_input_tokens_seen": 497906688,
25
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "logging_steps": 500,
29
  "max_steps": 1000000,
30
- "num_input_tokens_seen": 497906688,
31
  "num_train_epochs": 9223372036854775807,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
@@ -42,7 +82,7 @@
42
  "attributes": {}
43
  }
44
  },
45
- "total_flos": 7687997923000320.0,
46
  "train_batch_size": 64,
47
  "trial_name": null,
48
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0035,
5
  "eval_steps": 500,
6
+ "global_step": 3500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
23
  "loss": 7.7872,
24
  "num_input_tokens_seen": 497906688,
25
  "step": 1000
26
+ },
27
+ {
28
+ "epoch": 0.0015,
29
+ "grad_norm": 0.43762627243995667,
30
+ "learning_rate": 5.6483913125652505e-05,
31
+ "loss": 7.0276,
32
+ "num_input_tokens_seen": 746696384,
33
+ "step": 1500
34
+ },
35
+ {
36
+ "epoch": 0.002,
37
+ "grad_norm": 0.47100603580474854,
38
+ "learning_rate": 5.645562874251497e-05,
39
+ "loss": 6.5612,
40
+ "num_input_tokens_seen": 994790144,
41
+ "step": 2000
42
+ },
43
+ {
44
+ "epoch": 0.0025,
45
+ "grad_norm": 0.4795113205909729,
46
+ "learning_rate": 5.642734435937744e-05,
47
+ "loss": 6.2086,
48
+ "num_input_tokens_seen": 1243917376,
49
+ "step": 2500
50
+ },
51
+ {
52
+ "epoch": 0.003,
53
+ "grad_norm": 0.9179026484489441,
54
+ "learning_rate": 5.63990599762399e-05,
55
+ "loss": 5.9393,
56
+ "num_input_tokens_seen": 1492823744,
57
+ "step": 3000
58
+ },
59
+ {
60
+ "epoch": 0.0035,
61
+ "grad_norm": 1.1793241500854492,
62
+ "learning_rate": 5.6370775593102374e-05,
63
+ "loss": 5.728,
64
+ "num_input_tokens_seen": 1742012864,
65
+ "step": 3500
66
  }
67
  ],
68
  "logging_steps": 500,
69
  "max_steps": 1000000,
70
+ "num_input_tokens_seen": 1742012864,
71
  "num_train_epochs": 9223372036854775807,
72
  "save_steps": 500,
73
  "stateful_callbacks": {
 
82
  "attributes": {}
83
  }
84
  },
85
+ "total_flos": 2.689779350839296e+16,
86
  "train_batch_size": 64,
87
  "trial_name": null,
88
  "trial_params": null