penglingwei commited on
Commit
6f01c16
1 Parent(s): 6e46e3f

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a32d2c3a667587764cd9ca5d6d70a240736bf426d6475dac4043133fa73816ee
3
  size 355970836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e6466b58c3c120a943d635b9b9f77c78317c742d1be703c5a32a4b57e4ec1c8
3
  size 355970836
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd98378eb1df249c95152f56a7c980be9183dd9e5ac769e4f33b80952e554f55
3
  size 712036922
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73929ca6199d0cd5698949579f301ee09e22518ce3c78b1528bb2ba92bd5f78f
3
  size 712036922
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e392bc84f8761c1cb361bf95966bfb38a86990c20193cb6f07afc3e9b98a5dd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02ac934b04f9ad30a9947051ab49a9c4b8fcfbf28262f73cd84f8b6fe5cdc61a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aacb2caac6c123a2e842fada3eab2f7441181ca135f4cd4779f74c20a10ad6c1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e868db493f34528bd828066f7bfb20c4fcf4eb5967587ac96dfd619d77eeab66
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0027131336014813707,
5
  "eval_steps": 500,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -77,6 +77,76 @@
77
  "learning_rate": 1.76e-05,
78
  "loss": 37.2538,
79
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  }
81
  ],
82
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0054262672029627415,
5
  "eval_steps": 500,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
77
  "learning_rate": 1.76e-05,
78
  "loss": 37.2538,
79
  "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.002984446961629508,
83
+ "grad_norm": 78771.6875,
84
+ "learning_rate": 1.9600000000000002e-05,
85
+ "loss": 36.8398,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.0032557603217776452,
90
+ "grad_norm": 4261.8388671875,
91
+ "learning_rate": 2.16e-05,
92
+ "loss": 38.1147,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.0035270736819257823,
97
+ "grad_norm": 354.98797607421875,
98
+ "learning_rate": 2.36e-05,
99
+ "loss": 40.2607,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.0037983870420739193,
104
+ "grad_norm": 2892.681640625,
105
+ "learning_rate": 2.5600000000000002e-05,
106
+ "loss": 43.2013,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.004069700402222057,
111
+ "grad_norm": 1937.0869140625,
112
+ "learning_rate": 2.7600000000000003e-05,
113
+ "loss": 36.9207,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.004341013762370193,
118
+ "grad_norm": 2947.12939453125,
119
+ "learning_rate": 2.96e-05,
120
+ "loss": 37.1433,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.004612327122518331,
125
+ "grad_norm": 69275.578125,
126
+ "learning_rate": 3.16e-05,
127
+ "loss": 35.2396,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.004883640482666467,
132
+ "grad_norm": 113316.1328125,
133
+ "learning_rate": 3.3400000000000005e-05,
134
+ "loss": 52.4227,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.005154953842814605,
139
+ "grad_norm": 21579.859375,
140
+ "learning_rate": 3.54e-05,
141
+ "loss": 44.2034,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.0054262672029627415,
146
+ "grad_norm": 23244.189453125,
147
+ "learning_rate": 3.74e-05,
148
+ "loss": 48.1572,
149
+ "step": 200
150
  }
151
  ],
152
  "logging_steps": 10,