abhinavp commited on
Commit
c922b2a
1 Parent(s): 765cc91

Training in progress, step 120, checkpoint

Browse files
checkpoint-120/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa88b9923d32000ee2b4cdfc9e8d5b21092a959dfc406c3968dcb5b73db420a9
3
  size 544373707
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2dfa054188dae5878f0eecb98d946f0808413058d25d9c28cf47f971f9ebf834
3
  size 544373707
checkpoint-120/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c7ee9f68ca0b9288e0b84b23d77fb474c898c21f60d253a0bbf4b2169d57cba
3
  size 272184705
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cafb6543dd602e9af54f6677d9014ba0b3e5c563f52768aeccb2fbd2fa44915
3
  size 272184705
checkpoint-120/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 7.832551956176758,
3
  "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/fict-full-lstm-42/checkpoints/checkpoint-120",
4
  "epoch": 0.96,
5
  "eval_steps": 10,
@@ -11,169 +11,169 @@
11
  {
12
  "epoch": 0.08,
13
  "learning_rate": 4.600000000000001e-05,
14
- "loss": 10.8138,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.08,
19
- "eval_loss": 10.80423355102539,
20
- "eval_runtime": 2.8085,
21
- "eval_samples_per_second": 356.059,
22
- "eval_steps_per_second": 44.507,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.16,
27
  "learning_rate": 4.2e-05,
28
- "loss": 10.7946,
29
  "step": 20
30
  },
31
  {
32
  "epoch": 0.16,
33
- "eval_loss": 10.781017303466797,
34
- "eval_runtime": 2.8659,
35
- "eval_samples_per_second": 348.927,
36
- "eval_steps_per_second": 43.616,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.24,
41
  "learning_rate": 3.8e-05,
42
- "loss": 10.7603,
43
  "step": 30
44
  },
45
  {
46
  "epoch": 0.24,
47
- "eval_loss": 10.720499992370605,
48
- "eval_runtime": 2.7648,
49
- "eval_samples_per_second": 361.685,
50
- "eval_steps_per_second": 45.211,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.32,
55
  "learning_rate": 3.4000000000000007e-05,
56
- "loss": 10.6159,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.32,
61
- "eval_loss": 10.284521102905273,
62
- "eval_runtime": 2.8068,
63
- "eval_samples_per_second": 356.277,
64
- "eval_steps_per_second": 44.535,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.4,
69
  "learning_rate": 3e-05,
70
- "loss": 9.881,
71
  "step": 50
72
  },
73
  {
74
  "epoch": 0.4,
75
- "eval_loss": 9.241220474243164,
76
- "eval_runtime": 2.8265,
77
- "eval_samples_per_second": 353.795,
78
- "eval_steps_per_second": 44.224,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.48,
83
  "learning_rate": 2.6000000000000002e-05,
84
- "loss": 9.0551,
85
  "step": 60
86
  },
87
  {
88
  "epoch": 0.48,
89
- "eval_loss": 8.63198471069336,
90
- "eval_runtime": 2.913,
91
- "eval_samples_per_second": 343.289,
92
- "eval_steps_per_second": 42.911,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.56,
97
  "learning_rate": 2.2000000000000003e-05,
98
- "loss": 8.5603,
99
  "step": 70
100
  },
101
  {
102
  "epoch": 0.56,
103
- "eval_loss": 8.300483703613281,
104
- "eval_runtime": 2.8122,
105
- "eval_samples_per_second": 355.598,
106
- "eval_steps_per_second": 44.45,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.64,
111
  "learning_rate": 1.8e-05,
112
- "loss": 8.3761,
113
  "step": 80
114
  },
115
  {
116
  "epoch": 0.64,
117
- "eval_loss": 8.105779647827148,
118
- "eval_runtime": 2.8076,
119
- "eval_samples_per_second": 356.172,
120
- "eval_steps_per_second": 44.521,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.72,
125
  "learning_rate": 1.4000000000000001e-05,
126
- "loss": 8.2158,
127
  "step": 90
128
  },
129
  {
130
  "epoch": 0.72,
131
- "eval_loss": 7.982827663421631,
132
- "eval_runtime": 2.8051,
133
- "eval_samples_per_second": 356.497,
134
- "eval_steps_per_second": 44.562,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.8,
139
  "learning_rate": 1e-05,
140
- "loss": 8.0922,
141
  "step": 100
142
  },
143
  {
144
  "epoch": 0.8,
145
- "eval_loss": 7.904599189758301,
146
- "eval_runtime": 2.8231,
147
- "eval_samples_per_second": 354.225,
148
- "eval_steps_per_second": 44.278,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.88,
153
  "learning_rate": 6e-06,
154
- "loss": 8.0326,
155
  "step": 110
156
  },
157
  {
158
  "epoch": 0.88,
159
- "eval_loss": 7.856319427490234,
160
- "eval_runtime": 2.8253,
161
- "eval_samples_per_second": 353.939,
162
- "eval_steps_per_second": 44.242,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.96,
167
  "learning_rate": 2.0000000000000003e-06,
168
- "loss": 8.1135,
169
  "step": 120
170
  },
171
  {
172
  "epoch": 0.96,
173
- "eval_loss": 7.832551956176758,
174
- "eval_runtime": 2.8376,
175
- "eval_samples_per_second": 352.411,
176
- "eval_steps_per_second": 44.051,
177
  "step": 120
178
  }
179
  ],
 
1
  {
2
+ "best_metric": 7.84664249420166,
3
  "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/fict-full-lstm-42/checkpoints/checkpoint-120",
4
  "epoch": 0.96,
5
  "eval_steps": 10,
 
11
  {
12
  "epoch": 0.08,
13
  "learning_rate": 4.600000000000001e-05,
14
+ "loss": 10.8142,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.08,
19
+ "eval_loss": 10.805203437805176,
20
+ "eval_runtime": 2.7628,
21
+ "eval_samples_per_second": 361.95,
22
+ "eval_steps_per_second": 45.244,
23
  "step": 10
24
  },
25
  {
26
  "epoch": 0.16,
27
  "learning_rate": 4.2e-05,
28
+ "loss": 10.7959,
29
  "step": 20
30
  },
31
  {
32
  "epoch": 0.16,
33
+ "eval_loss": 10.783019065856934,
34
+ "eval_runtime": 2.9141,
35
+ "eval_samples_per_second": 343.159,
36
+ "eval_steps_per_second": 42.895,
37
  "step": 20
38
  },
39
  {
40
  "epoch": 0.24,
41
  "learning_rate": 3.8e-05,
42
+ "loss": 10.7637,
43
  "step": 30
44
  },
45
  {
46
  "epoch": 0.24,
47
+ "eval_loss": 10.725115776062012,
48
+ "eval_runtime": 2.7934,
49
+ "eval_samples_per_second": 357.988,
50
+ "eval_steps_per_second": 44.748,
51
  "step": 30
52
  },
53
  {
54
  "epoch": 0.32,
55
  "learning_rate": 3.4000000000000007e-05,
56
+ "loss": 10.62,
57
  "step": 40
58
  },
59
  {
60
  "epoch": 0.32,
61
+ "eval_loss": 10.306303977966309,
62
+ "eval_runtime": 2.7909,
63
+ "eval_samples_per_second": 358.31,
64
+ "eval_steps_per_second": 44.789,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 0.4,
69
  "learning_rate": 3e-05,
70
+ "loss": 9.8971,
71
  "step": 50
72
  },
73
  {
74
  "epoch": 0.4,
75
+ "eval_loss": 9.273224830627441,
76
+ "eval_runtime": 2.8125,
77
+ "eval_samples_per_second": 355.55,
78
+ "eval_steps_per_second": 44.444,
79
  "step": 50
80
  },
81
  {
82
  "epoch": 0.48,
83
  "learning_rate": 2.6000000000000002e-05,
84
+ "loss": 9.0782,
85
  "step": 60
86
  },
87
  {
88
  "epoch": 0.48,
89
+ "eval_loss": 8.667804718017578,
90
+ "eval_runtime": 2.8983,
91
+ "eval_samples_per_second": 345.031,
92
+ "eval_steps_per_second": 43.129,
93
  "step": 60
94
  },
95
  {
96
  "epoch": 0.56,
97
  "learning_rate": 2.2000000000000003e-05,
98
+ "loss": 8.5836,
99
  "step": 70
100
  },
101
  {
102
  "epoch": 0.56,
103
+ "eval_loss": 8.331869125366211,
104
+ "eval_runtime": 2.7654,
105
+ "eval_samples_per_second": 361.611,
106
+ "eval_steps_per_second": 45.201,
107
  "step": 70
108
  },
109
  {
110
  "epoch": 0.64,
111
  "learning_rate": 1.8e-05,
112
+ "loss": 8.4033,
113
  "step": 80
114
  },
115
  {
116
  "epoch": 0.64,
117
+ "eval_loss": 8.129607200622559,
118
+ "eval_runtime": 2.7665,
119
+ "eval_samples_per_second": 361.474,
120
+ "eval_steps_per_second": 45.184,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.72,
125
  "learning_rate": 1.4000000000000001e-05,
126
+ "loss": 8.2454,
127
  "step": 90
128
  },
129
  {
130
  "epoch": 0.72,
131
+ "eval_loss": 8.00214672088623,
132
+ "eval_runtime": 2.7731,
133
+ "eval_samples_per_second": 360.605,
134
+ "eval_steps_per_second": 45.076,
135
  "step": 90
136
  },
137
  {
138
  "epoch": 0.8,
139
  "learning_rate": 1e-05,
140
+ "loss": 8.1265,
141
  "step": 100
142
  },
143
  {
144
  "epoch": 0.8,
145
+ "eval_loss": 7.9211931228637695,
146
+ "eval_runtime": 2.7627,
147
+ "eval_samples_per_second": 361.96,
148
+ "eval_steps_per_second": 45.245,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 0.88,
153
  "learning_rate": 6e-06,
154
+ "loss": 8.0655,
155
  "step": 110
156
  },
157
  {
158
  "epoch": 0.88,
159
+ "eval_loss": 7.871333122253418,
160
+ "eval_runtime": 2.7896,
161
+ "eval_samples_per_second": 358.475,
162
+ "eval_steps_per_second": 44.809,
163
  "step": 110
164
  },
165
  {
166
  "epoch": 0.96,
167
  "learning_rate": 2.0000000000000003e-06,
168
+ "loss": 8.1315,
169
  "step": 120
170
  },
171
  {
172
  "epoch": 0.96,
173
+ "eval_loss": 7.84664249420166,
174
+ "eval_runtime": 2.9029,
175
+ "eval_samples_per_second": 344.487,
176
+ "eval_steps_per_second": 43.061,
177
  "step": 120
178
  }
179
  ],
checkpoint-120/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca94376b560130659aac7fc1141fac166f9e87d222e6f51cda86dacf6c253994
3
  size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baffe53b8f8b613e45dfc80a5db922518a94ed9ef3748800ea2d2e11d33fbc0d
3
  size 4155