adel-cybral commited on
Commit
c398496
1 Parent(s): 6fbe2ee

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:762566100d643c393dc9765c901427a1a7e585f80720544f1d9732e8c2f1a638
3
  size 57029756
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6efbf6915c0d7fd8fc40574e3c70bbbeed5caba0f2eb7d8bed57661bd17296
3
  size 57029756
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73eb6618c19390aa57b57efc6e80aa55d60ef89276374392558a5141f6563c2a
3
  size 114100410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58dc8495027d948ee4d9ac9cdbf9285489f20ab3b7c701c7afd65e841482e4da
3
  size 114100410
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d172399ef3366064f2426bb341e0c2875d4dcfd2615777d3613f0258a4aaa64
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5202dcf80c7b0f10dc986464ca4f6b0ef0ad4a74acba3af2d863fb43e4dbdcfb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9db35652316cd18818079609bdbd22b09de59e7ca3cd85099fc0bf1dbb6d1001
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0992f3eb6a6e87d7c905716ad6cb10cdc7e7e02476fe12e47e23d9729391391c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,116 +1,19 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 4390,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.5694760820045558,
13
- "grad_norm": 1.5356756448745728,
14
  "learning_rate": 1.7722095671981778e-05,
15
- "loss": 0.1506,
16
  "step": 500
17
- },
18
- {
19
- "epoch": 1.0,
20
- "eval_accuracy": 0.9634930973676268,
21
- "eval_f1": 0.8442987330517893,
22
- "eval_loss": 0.13140565156936646,
23
- "eval_precision": 0.8387987192227007,
24
- "eval_recall": 0.849871350262893,
25
- "eval_runtime": 3.0691,
26
- "eval_samples_per_second": 1058.938,
27
- "eval_steps_per_second": 66.469,
28
- "step": 878
29
- },
30
- {
31
- "epoch": 1.1389521640091116,
32
- "grad_norm": 3.3906660079956055,
33
- "learning_rate": 1.5444191343963555e-05,
34
- "loss": 0.12,
35
- "step": 1000
36
- },
37
- {
38
- "epoch": 1.7084282460136673,
39
- "grad_norm": 6.772578239440918,
40
- "learning_rate": 1.3166287015945332e-05,
41
- "loss": 0.1046,
42
- "step": 1500
43
- },
44
- {
45
- "epoch": 2.0,
46
- "eval_accuracy": 0.9679412839372805,
47
- "eval_f1": 0.866677846721449,
48
- "eval_loss": 0.12156210839748383,
49
- "eval_precision": 0.866145251396648,
50
- "eval_recall": 0.8672110974381922,
51
- "eval_runtime": 3.0579,
52
- "eval_samples_per_second": 1062.822,
53
- "eval_steps_per_second": 66.712,
54
- "step": 1756
55
- },
56
- {
57
- "epoch": 2.277904328018223,
58
- "grad_norm": 8.046541213989258,
59
- "learning_rate": 1.0888382687927108e-05,
60
- "loss": 0.098,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 2.847380410022779,
65
- "grad_norm": 3.613861322402954,
66
- "learning_rate": 8.610478359908885e-06,
67
- "loss": 0.0905,
68
- "step": 2500
69
- },
70
- {
71
- "epoch": 3.0,
72
- "eval_accuracy": 0.9694981492366594,
73
- "eval_f1": 0.8722589241103648,
74
- "eval_loss": 0.11584340035915375,
75
- "eval_precision": 0.8657703328190435,
76
- "eval_recall": 0.878845508446135,
77
- "eval_runtime": 3.8011,
78
- "eval_samples_per_second": 855.007,
79
- "eval_steps_per_second": 53.668,
80
- "step": 2634
81
- },
82
- {
83
- "epoch": 3.416856492027335,
84
- "grad_norm": 3.454561471939087,
85
- "learning_rate": 6.3325740318906616e-06,
86
- "loss": 0.0841,
87
- "step": 3000
88
- },
89
- {
90
- "epoch": 3.9863325740318905,
91
- "grad_norm": 7.105097770690918,
92
- "learning_rate": 4.054669703872437e-06,
93
- "loss": 0.076,
94
- "step": 3500
95
- },
96
- {
97
- "epoch": 4.0,
98
- "eval_accuracy": 0.9696570130427185,
99
- "eval_f1": 0.8694353812973511,
100
- "eval_loss": 0.11094123870134354,
101
- "eval_precision": 0.8596894138232721,
102
- "eval_recall": 0.8794048551292091,
103
- "eval_runtime": 4.8501,
104
- "eval_samples_per_second": 670.083,
105
- "eval_steps_per_second": 42.061,
106
- "step": 3512
107
- },
108
- {
109
- "epoch": 4.555808656036446,
110
- "grad_norm": 4.661518096923828,
111
- "learning_rate": 1.7767653758542143e-06,
112
- "loss": 0.0741,
113
- "step": 4000
114
  }
115
  ],
116
  "logging_steps": 500,
@@ -125,12 +28,12 @@
125
  "should_evaluate": false,
126
  "should_log": false,
127
  "should_save": true,
128
- "should_training_stop": true
129
  },
130
  "attributes": {}
131
  }
132
  },
133
- "total_flos": 91500454459296.0,
134
  "train_batch_size": 16,
135
  "trial_name": null,
136
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5694760820045558,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.5694760820045558,
13
+ "grad_norm": 1.5650427341461182,
14
  "learning_rate": 1.7722095671981778e-05,
15
+ "loss": 0.5173,
16
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  }
18
  ],
19
  "logging_steps": 500,
 
28
  "should_evaluate": false,
29
  "should_log": false,
30
  "should_save": true,
31
+ "should_training_stop": false
32
  },
33
  "attributes": {}
34
  }
35
  },
36
+ "total_flos": 10392521743584.0,
37
  "train_batch_size": 16,
38
  "trial_name": null,
39
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6179295ed33670bef4e6ae443db6c86535ebdf6636bdd940a34e8a5e5cd430e0
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdba20217acc5727d371bb2b4ee86307f247f3c56144c2ceaef9f29f3d06f144
3
  size 5304