MartaTT commited on
Commit
0c1a358
1 Parent(s): 20f2e2e

Upload 6 files

Browse files
Files changed (6) hide show
  1. config.json +25 -0
  2. model.safetensors +3 -0
  3. optimizer.pt +3 -0
  4. rng_state.pth +3 -0
  5. scheduler.pt +3 -0
  6. trainer_state.json +152 -0
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.44.2",
24
+ "vocab_size": 30522
25
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e9d0a5e250b579ca4a6b181ff635893ed904c6e1a20e062a9627c6fee756172
3
+ size 267832560
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5818b4eafc7aed5ce0d80b9b9b3604c2b8620fb5f73514a24be0534d141f2f63
3
+ size 535724410
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fb67e304d0a8a57ca5798c7c6d690df7228013fc1fb4bf29228391465a9e39d
3
+ size 13990
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2da80b707cd6dabf58a246469e5d432855cb6b25908ad62d5f9251735c30be
3
+ size 1064
trainer_state.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
+ "eval_steps": 500,
6
+ "global_step": 945,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.0049484120681881905,
14
+ "eval_runtime": 86.2189,
15
+ "eval_samples_per_second": 11.598,
16
+ "eval_steps_per_second": 0.731,
17
+ "step": 63
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_loss": 0.0015708731953054667,
22
+ "eval_runtime": 82.6736,
23
+ "eval_samples_per_second": 12.096,
24
+ "eval_steps_per_second": 0.762,
25
+ "step": 126
26
+ },
27
+ {
28
+ "epoch": 3.0,
29
+ "eval_loss": 0.0008609762298874557,
30
+ "eval_runtime": 86.3582,
31
+ "eval_samples_per_second": 11.58,
32
+ "eval_steps_per_second": 0.73,
33
+ "step": 189
34
+ },
35
+ {
36
+ "epoch": 4.0,
37
+ "eval_loss": 0.0005577785195782781,
38
+ "eval_runtime": 83.2641,
39
+ "eval_samples_per_second": 12.01,
40
+ "eval_steps_per_second": 0.757,
41
+ "step": 252
42
+ },
43
+ {
44
+ "epoch": 5.0,
45
+ "eval_loss": 0.0003976120497100055,
46
+ "eval_runtime": 120.3456,
47
+ "eval_samples_per_second": 8.309,
48
+ "eval_steps_per_second": 0.523,
49
+ "step": 315
50
+ },
51
+ {
52
+ "epoch": 6.0,
53
+ "eval_loss": 0.00029855340835638344,
54
+ "eval_runtime": 1114.9335,
55
+ "eval_samples_per_second": 0.897,
56
+ "eval_steps_per_second": 0.057,
57
+ "step": 378
58
+ },
59
+ {
60
+ "epoch": 7.0,
61
+ "eval_loss": 0.00023258940200321376,
62
+ "eval_runtime": 1077.4431,
63
+ "eval_samples_per_second": 0.928,
64
+ "eval_steps_per_second": 0.058,
65
+ "step": 441
66
+ },
67
+ {
68
+ "epoch": 7.936507936507937,
69
+ "grad_norm": 0.00615890184417367,
70
+ "learning_rate": 9.417989417989418e-06,
71
+ "loss": 0.0229,
72
+ "step": 500
73
+ },
74
+ {
75
+ "epoch": 8.0,
76
+ "eval_loss": 0.0001905104873003438,
77
+ "eval_runtime": 92.7233,
78
+ "eval_samples_per_second": 10.785,
79
+ "eval_steps_per_second": 0.679,
80
+ "step": 504
81
+ },
82
+ {
83
+ "epoch": 9.0,
84
+ "eval_loss": 0.00016125467664096504,
85
+ "eval_runtime": 157.6217,
86
+ "eval_samples_per_second": 6.344,
87
+ "eval_steps_per_second": 0.4,
88
+ "step": 567
89
+ },
90
+ {
91
+ "epoch": 10.0,
92
+ "eval_loss": 0.00014133936201687902,
93
+ "eval_runtime": 253.9397,
94
+ "eval_samples_per_second": 3.938,
95
+ "eval_steps_per_second": 0.248,
96
+ "step": 630
97
+ },
98
+ {
99
+ "epoch": 11.0,
100
+ "eval_loss": 0.00012699734361376613,
101
+ "eval_runtime": 290.6194,
102
+ "eval_samples_per_second": 3.441,
103
+ "eval_steps_per_second": 0.217,
104
+ "step": 693
105
+ },
106
+ {
107
+ "epoch": 12.0,
108
+ "eval_loss": 0.00011713778803823516,
109
+ "eval_runtime": 89.5033,
110
+ "eval_samples_per_second": 11.173,
111
+ "eval_steps_per_second": 0.704,
112
+ "step": 756
113
+ },
114
+ {
115
+ "epoch": 13.0,
116
+ "eval_loss": 0.00011058501695515588,
117
+ "eval_runtime": 81.9569,
118
+ "eval_samples_per_second": 12.202,
119
+ "eval_steps_per_second": 0.769,
120
+ "step": 819
121
+ },
122
+ {
123
+ "epoch": 14.0,
124
+ "eval_loss": 0.00010690685303416103,
125
+ "eval_runtime": 82.071,
126
+ "eval_samples_per_second": 12.185,
127
+ "eval_steps_per_second": 0.768,
128
+ "step": 882
129
+ }
130
+ ],
131
+ "logging_steps": 500,
132
+ "max_steps": 945,
133
+ "num_input_tokens_seen": 0,
134
+ "num_train_epochs": 15,
135
+ "save_steps": 500,
136
+ "stateful_callbacks": {
137
+ "TrainerControl": {
138
+ "args": {
139
+ "should_epoch_stop": false,
140
+ "should_evaluate": false,
141
+ "should_log": false,
142
+ "should_save": true,
143
+ "should_training_stop": true
144
+ },
145
+ "attributes": {}
146
+ }
147
+ },
148
+ "total_flos": 1987010979840000.0,
149
+ "train_batch_size": 16,
150
+ "trial_name": null,
151
+ "trial_params": null
152
+ }