kbberendsen commited on
Commit
dcb78fe
1 Parent(s): 821263a

Training in progress, epoch 1

Browse files
config.json CHANGED
@@ -1,25 +1,25 @@
1
  {
2
- "_name_or_path": "microsoft/deberta-v3-large",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
- "hidden_size": 1024,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 4096,
12
  "layer_norm_eps": 1e-07,
13
  "max_position_embeddings": 512,
14
  "max_relative_positions": -1,
15
  "model_type": "deberta-v2",
16
  "norm_rel_ebd": "layer_norm",
17
- "num_attention_heads": 16,
18
- "num_hidden_layers": 24,
19
  "pad_token_id": 0,
20
  "pooler_dropout": 0,
21
  "pooler_hidden_act": "gelu",
22
- "pooler_hidden_size": 1024,
23
  "pos_att_type": [
24
  "p2c",
25
  "c2p"
 
1
  {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
  "layer_norm_eps": 1e-07,
13
  "max_position_embeddings": 512,
14
  "max_relative_positions": -1,
15
  "model_type": "deberta-v2",
16
  "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
  "pooler_dropout": 0,
21
  "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
  "pos_att_type": [
24
  "p2c",
25
  "c2p"
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db25cee7efaa01864d9a77a4f74d0c9d2326018ba142cbe1a036d5a07c453429
3
- size 1740304440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1ca670602465a09e34815d2a91e895491af587d899f2f9221d046f8418e488
3
+ size 737719272
run-0/checkpoint-27/config.json CHANGED
@@ -1,25 +1,25 @@
1
  {
2
- "_name_or_path": "microsoft/deberta-v3-large",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
- "hidden_size": 1024,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 4096,
12
  "layer_norm_eps": 1e-07,
13
  "max_position_embeddings": 512,
14
  "max_relative_positions": -1,
15
  "model_type": "deberta-v2",
16
  "norm_rel_ebd": "layer_norm",
17
- "num_attention_heads": 16,
18
- "num_hidden_layers": 24,
19
  "pad_token_id": 0,
20
  "pooler_dropout": 0,
21
  "pooler_hidden_act": "gelu",
22
- "pooler_hidden_size": 1024,
23
  "pos_att_type": [
24
  "p2c",
25
  "c2p"
 
1
  {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
3
  "architectures": [
4
  "DebertaV2ForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
  "layer_norm_eps": 1e-07,
13
  "max_position_embeddings": 512,
14
  "max_relative_positions": -1,
15
  "model_type": "deberta-v2",
16
  "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
  "pad_token_id": 0,
20
  "pooler_dropout": 0,
21
  "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
  "pos_att_type": [
24
  "p2c",
25
  "c2p"
run-0/checkpoint-27/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:362787337e763ae671c0d6f586075e16ed1ee0b7814d9f4a66bd9c573b910b06
3
- size 1740304440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1ca670602465a09e34815d2a91e895491af587d899f2f9221d046f8418e488
3
+ size 737719272
run-0/checkpoint-27/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8bf148f031a6b40ebc0a1b878fd399b7041c0572404062d16f4c6a0f8e25600d
3
- size 3480840240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9f7fa6b21ebbdc8b9fc9c6bc85aee0d8e8ae05efc4a1b8c3c87b707a3ed0907
3
+ size 1475558394
run-0/checkpoint-27/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70a0155120298931ad746a4bb00fd0abdd59a8850305e017cf843b2bc5e7b5b0
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0fce9eb55c0fdd6af139024ee02e7031cd5bd6b03707d2bb5484decd5f5b448
3
+ size 14308
run-0/checkpoint-27/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b332e5aa1f9a47941437dcec4e5a39d6bd8a90d334761d98098129d5846b2dbe
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:029be0abaf4172d8d79d3fe3569afed060aca3e4a5ee2358fb92bfcebf064181
3
  size 1064
run-0/checkpoint-27/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.0,
3
  "best_model_checkpoint": "deberta-v3-large-finetuned-cola-midterm/run-0/checkpoint-27",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -10,11 +10,11 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.6029512882232666,
14
- "eval_matthews_correlation": 0.0,
15
- "eval_runtime": 8.1359,
16
- "eval_samples_per_second": 128.197,
17
- "eval_steps_per_second": 8.112,
18
  "step": 27
19
  }
20
  ],
@@ -24,12 +24,12 @@
24
  "num_train_epochs": 2,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
- "train_batch_size": 32,
28
  "trial_name": null,
29
  "trial_params": {
30
- "learning_rate": 4.26417580168616e-06,
31
  "num_train_epochs": 2,
32
- "per_device_train_batch_size": 32,
33
- "seed": 3
34
  }
35
  }
 
1
  {
2
+ "best_metric": 0.048639888821286496,
3
  "best_model_checkpoint": "deberta-v3-large-finetuned-cola-midterm/run-0/checkpoint-27",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.691221296787262,
14
+ "eval_matthews_correlation": 0.048639888821286496,
15
+ "eval_runtime": 2.4651,
16
+ "eval_samples_per_second": 423.102,
17
+ "eval_steps_per_second": 26.773,
18
  "step": 27
19
  }
20
  ],
 
24
  "num_train_epochs": 2,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
+ "train_batch_size": 16,
28
  "trial_name": null,
29
  "trial_params": {
30
+ "learning_rate": 1.295113530605781e-06,
31
  "num_train_epochs": 2,
32
+ "per_device_train_batch_size": 16,
33
+ "seed": 29
34
  }
35
  }
run-0/checkpoint-27/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b511e0093eff2e18a9a8913c53e4c6377789256044eb610b864499c14d17e4f5
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ca2192d898bc34b22b60030a4c304d30c36cac0af471e752e5be729bc08981c
3
  size 4984
runs/Feb29_12-55-06_e1aa4b7a2e4c/events.out.tfevents.1709211387.e1aa4b7a2e4c.252.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bafa5acd0f7ee6a2073f29d9b20884b1390f9674e2c8893f5a872a7bd8342ac
3
+ size 5812
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:809a0430adc83b27e312907f1dee6ad0edefa61c1f8ccdf999f9172f36110b1e
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ca2192d898bc34b22b60030a4c304d30c36cac0af471e752e5be729bc08981c
3
  size 4984