Cheng98 commited on
Commit
a1b1c34
1 Parent(s): bcab7db

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -32,7 +32,7 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [facebook/opt-350m](https://huggingface.co/facebook/opt-350m) on the GLUE RTE dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 2.1042
36
  - Accuracy: 0.7112
37
 
38
  ## Model description
@@ -53,7 +53,7 @@ More information needed
53
 
54
  The following hyperparameters were used during training:
55
  - learning_rate: 2e-05
56
- - train_batch_size: 32
57
  - eval_batch_size: 8
58
  - seed: 42
59
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
32
 
33
  This model is a fine-tuned version of [facebook/opt-350m](https://huggingface.co/facebook/opt-350m) on the GLUE RTE dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 2.0566
36
  - Accuracy: 0.7112
37
 
38
  ## Model description
 
53
 
54
  The following hyperparameters were used during training:
55
  - learning_rate: 2e-05
56
+ - train_batch_size: 16
57
  - eval_batch_size: 8
58
  - seed: 42
59
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 5.0,
3
  "eval_accuracy": 0.7111913357400722,
4
- "eval_loss": 2.1042087078094482,
5
- "eval_runtime": 1.5849,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 174.772,
8
- "eval_steps_per_second": 22.083,
9
- "train_loss": 0.30048399705153245,
10
- "train_runtime": 200.6393,
11
  "train_samples": 2490,
12
- "train_samples_per_second": 62.052,
13
- "train_steps_per_second": 1.944
14
  }
 
1
  {
2
  "epoch": 5.0,
3
  "eval_accuracy": 0.7111913357400722,
4
+ "eval_loss": 2.056576728820801,
5
+ "eval_runtime": 0.784,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 353.301,
8
+ "eval_steps_per_second": 44.641,
9
+ "train_loss": 0.321053017102755,
10
+ "train_runtime": 84.2492,
11
  "train_samples": 2490,
12
+ "train_samples_per_second": 147.776,
13
+ "train_steps_per_second": 9.258
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
  "eval_accuracy": 0.7111913357400722,
4
- "eval_loss": 2.1042087078094482,
5
- "eval_runtime": 1.5849,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 174.772,
8
- "eval_steps_per_second": 22.083
9
  }
 
1
  {
2
  "epoch": 5.0,
3
  "eval_accuracy": 0.7111913357400722,
4
+ "eval_loss": 2.056576728820801,
5
+ "eval_runtime": 0.784,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 353.301,
8
+ "eval_steps_per_second": 44.641
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b358536980e2f2c0deac46a206a0542b52e165e42423a83c7316fe8553ef30c7
3
  size 1324921569
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70978ed91bc8237a364f1abc9663e9a21c1845bf6e97bd2a8648f64ca83c3d1e
3
  size 1324921569
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.30048399705153245,
4
- "train_runtime": 200.6393,
5
  "train_samples": 2490,
6
- "train_samples_per_second": 62.052,
7
- "train_steps_per_second": 1.944
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.321053017102755,
4
+ "train_runtime": 84.2492,
5
  "train_samples": 2490,
6
+ "train_samples_per_second": 147.776,
7
+ "train_steps_per_second": 9.258
8
  }
trainer_state.json CHANGED
@@ -2,22 +2,28 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
- "global_step": 390,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
10
  {
11
  "epoch": 5.0,
12
- "step": 390,
13
  "total_flos": 2900597184921600.0,
14
- "train_loss": 0.30048399705153245,
15
- "train_runtime": 200.6393,
16
- "train_samples_per_second": 62.052,
17
- "train_steps_per_second": 1.944
18
  }
19
  ],
20
- "max_steps": 390,
21
  "num_train_epochs": 5,
22
  "total_flos": 2900597184921600.0,
23
  "trial_name": null,
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
+ "global_step": 780,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 3.21,
12
+ "learning_rate": 7.282051282051282e-06,
13
+ "loss": 0.47,
14
+ "step": 500
15
+ },
16
  {
17
  "epoch": 5.0,
18
+ "step": 780,
19
  "total_flos": 2900597184921600.0,
20
+ "train_loss": 0.321053017102755,
21
+ "train_runtime": 84.2492,
22
+ "train_samples_per_second": 147.776,
23
+ "train_steps_per_second": 9.258
24
  }
25
  ],
26
+ "max_steps": 780,
27
  "num_train_epochs": 5,
28
  "total_flos": 2900597184921600.0,
29
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b3b22874f2da3cca2923e809d12e9c08b60ee574321286e71bb5196c3407c8e
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59537fe8b1e8f4d9b7dbdb8d5995dcdb60103dcce782bee474e7b5ed99736261
3
  size 3963