AlekseyKorshuk commited on
Commit
daf6441
1 Parent(s): 0020f82

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +160 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 2.4849175347222223,
4
+ "train_runtime": 11422.0717,
5
+ "train_samples": 286,
6
+ "train_samples_per_second": 0.025,
7
+ "train_steps_per_second": 0.001
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 2.4849175347222223,
4
+ "train_runtime": 11422.0717,
5
+ "train_samples": 286,
6
+ "train_samples_per_second": 0.025,
7
+ "train_steps_per_second": 0.001
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 9,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.11,
12
+ "learning_rate": 6e-06,
13
+ "loss": 2.4875,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.11,
18
+ "eval_accuracy": 0.3397051914493775,
19
+ "eval_loss": 2.505859375,
20
+ "eval_runtime": 5.7507,
21
+ "eval_samples_per_second": 12.52,
22
+ "eval_steps_per_second": 0.522,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.22,
27
+ "learning_rate": 6e-06,
28
+ "loss": 2.5339,
29
+ "step": 2
30
+ },
31
+ {
32
+ "epoch": 0.22,
33
+ "eval_accuracy": 0.3397051914493775,
34
+ "eval_loss": 2.505859375,
35
+ "eval_runtime": 6.1033,
36
+ "eval_samples_per_second": 11.797,
37
+ "eval_steps_per_second": 0.492,
38
+ "step": 2
39
+ },
40
+ {
41
+ "epoch": 0.33,
42
+ "learning_rate": 6e-06,
43
+ "loss": 2.5161,
44
+ "step": 3
45
+ },
46
+ {
47
+ "epoch": 0.33,
48
+ "eval_accuracy": 0.3397051914493775,
49
+ "eval_loss": 2.505859375,
50
+ "eval_runtime": 5.249,
51
+ "eval_samples_per_second": 13.717,
52
+ "eval_steps_per_second": 0.572,
53
+ "step": 3
54
+ },
55
+ {
56
+ "epoch": 0.44,
57
+ "learning_rate": 6e-06,
58
+ "loss": 2.4524,
59
+ "step": 4
60
+ },
61
+ {
62
+ "epoch": 0.44,
63
+ "eval_accuracy": 0.3397051914493775,
64
+ "eval_loss": 2.505859375,
65
+ "eval_runtime": 5.2566,
66
+ "eval_samples_per_second": 13.697,
67
+ "eval_steps_per_second": 0.571,
68
+ "step": 4
69
+ },
70
+ {
71
+ "epoch": 0.56,
72
+ "learning_rate": 6e-06,
73
+ "loss": 2.554,
74
+ "step": 5
75
+ },
76
+ {
77
+ "epoch": 0.56,
78
+ "eval_accuracy": 0.34155508574113225,
79
+ "eval_loss": 2.478515625,
80
+ "eval_runtime": 6.2098,
81
+ "eval_samples_per_second": 11.595,
82
+ "eval_steps_per_second": 0.483,
83
+ "step": 5
84
+ },
85
+ {
86
+ "epoch": 0.67,
87
+ "learning_rate": 6e-06,
88
+ "loss": 2.4678,
89
+ "step": 6
90
+ },
91
+ {
92
+ "epoch": 0.67,
93
+ "eval_accuracy": 0.34155508574113225,
94
+ "eval_loss": 2.478515625,
95
+ "eval_runtime": 6.0227,
96
+ "eval_samples_per_second": 11.955,
97
+ "eval_steps_per_second": 0.498,
98
+ "step": 6
99
+ },
100
+ {
101
+ "epoch": 0.78,
102
+ "learning_rate": 6e-06,
103
+ "loss": 2.4836,
104
+ "step": 7
105
+ },
106
+ {
107
+ "epoch": 0.78,
108
+ "eval_accuracy": 0.34584214235377025,
109
+ "eval_loss": 2.447265625,
110
+ "eval_runtime": 5.1937,
111
+ "eval_samples_per_second": 13.863,
112
+ "eval_steps_per_second": 0.578,
113
+ "step": 7
114
+ },
115
+ {
116
+ "epoch": 0.89,
117
+ "learning_rate": 6e-06,
118
+ "loss": 2.4138,
119
+ "step": 8
120
+ },
121
+ {
122
+ "epoch": 0.89,
123
+ "eval_accuracy": 0.34728094902513507,
124
+ "eval_loss": 2.4296875,
125
+ "eval_runtime": 5.8071,
126
+ "eval_samples_per_second": 12.399,
127
+ "eval_steps_per_second": 0.517,
128
+ "step": 8
129
+ },
130
+ {
131
+ "epoch": 1.0,
132
+ "learning_rate": 6e-06,
133
+ "loss": 2.4551,
134
+ "step": 9
135
+ },
136
+ {
137
+ "epoch": 1.0,
138
+ "eval_accuracy": 0.348661028893587,
139
+ "eval_loss": 2.412109375,
140
+ "eval_runtime": 6.21,
141
+ "eval_samples_per_second": 11.594,
142
+ "eval_steps_per_second": 0.483,
143
+ "step": 9
144
+ },
145
+ {
146
+ "epoch": 1.0,
147
+ "step": 9,
148
+ "total_flos": 1309654646784.0,
149
+ "train_loss": 2.4849175347222223,
150
+ "train_runtime": 11422.0717,
151
+ "train_samples_per_second": 0.025,
152
+ "train_steps_per_second": 0.001
153
+ }
154
+ ],
155
+ "max_steps": 9,
156
+ "num_train_epochs": 1,
157
+ "total_flos": 1309654646784.0,
158
+ "trial_name": null,
159
+ "trial_params": null
160
+ }