sharkMeow commited on
Commit
66b8514
·
verified ·
1 Parent(s): 2649864

End of training

Browse files
Files changed (5) hide show
  1. README.md +5 -1
  2. all_results.json +10 -10
  3. eval_results.json +5 -5
  4. train_results.json +6 -6
  5. trainer_state.json +34 -13
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  model-index:
@@ -11,7 +13,9 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # clip-roberta-finetuned
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
+ license: gpl-3.0
3
+ base_model: ckiplab/bert-base-chinese
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # clip-roberta-finetuned
15
 
16
+ This model is a fine-tuned version of [ckiplab/bert-base-chinese](https://huggingface.co/ckiplab/bert-base-chinese) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.0543
19
 
20
  ## Model description
21
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 2.635524034500122,
4
- "eval_runtime": 3.7818,
5
- "eval_samples_per_second": 331.589,
6
- "eval_steps_per_second": 5.289,
7
- "total_flos": 1658811247948800.0,
8
- "train_loss": 3.298740234375,
9
- "train_runtime": 93.9998,
10
- "train_samples_per_second": 133.405,
11
- "train_steps_per_second": 2.128
12
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.054326437413692474,
4
+ "eval_runtime": 3.844,
5
+ "eval_samples_per_second": 326.223,
6
+ "eval_steps_per_second": 3.382,
7
+ "total_flos": 1.6588112479488e+16,
8
+ "train_loss": 0.4321923828125,
9
+ "train_runtime": 849.2414,
10
+ "train_samples_per_second": 147.661,
11
+ "train_steps_per_second": 1.884
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 2.635524034500122,
4
- "eval_runtime": 3.7818,
5
- "eval_samples_per_second": 331.589,
6
- "eval_steps_per_second": 5.289
7
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.054326437413692474,
4
+ "eval_runtime": 3.844,
5
+ "eval_samples_per_second": 326.223,
6
+ "eval_steps_per_second": 3.382
7
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 1658811247948800.0,
4
- "train_loss": 3.298740234375,
5
- "train_runtime": 93.9998,
6
- "train_samples_per_second": 133.405,
7
- "train_steps_per_second": 2.128
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "total_flos": 1.6588112479488e+16,
4
+ "train_loss": 0.4321923828125,
5
+ "train_runtime": 849.2414,
6
+ "train_samples_per_second": 147.661,
7
+ "train_steps_per_second": 1.884
8
  }
trainer_state.json CHANGED
@@ -1,27 +1,48 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 10.0,
13
- "step": 200,
14
- "total_flos": 1658811247948800.0,
15
- "train_loss": 3.298740234375,
16
- "train_runtime": 93.9998,
17
- "train_samples_per_second": 133.405,
18
- "train_steps_per_second": 2.128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 500,
22
- "max_steps": 200,
23
  "num_input_tokens_seen": 0,
24
- "num_train_epochs": 10,
25
  "save_steps": 500,
26
  "stateful_callbacks": {
27
  "TrainerControl": {
@@ -35,8 +56,8 @@
35
  "attributes": {}
36
  }
37
  },
38
- "total_flos": 1658811247948800.0,
39
- "train_batch_size": 64,
40
  "trial_name": null,
41
  "trial_params": null
42
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 100.0,
5
  "eval_steps": 500,
6
+ "global_step": 1600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 31.25,
13
+ "grad_norm": 1.9496452808380127,
14
+ "learning_rate": 3.4375e-05,
15
+ "loss": 1.2881,
16
+ "step": 500
17
+ },
18
+ {
19
+ "epoch": 62.5,
20
+ "grad_norm": 0.6210575699806213,
21
+ "learning_rate": 1.8750000000000002e-05,
22
+ "loss": 0.0667,
23
+ "step": 1000
24
+ },
25
+ {
26
+ "epoch": 93.75,
27
+ "grad_norm": 0.8036883473396301,
28
+ "learning_rate": 3.125e-06,
29
+ "loss": 0.0245,
30
+ "step": 1500
31
+ },
32
+ {
33
+ "epoch": 100.0,
34
+ "step": 1600,
35
+ "total_flos": 1.6588112479488e+16,
36
+ "train_loss": 0.4321923828125,
37
+ "train_runtime": 849.2414,
38
+ "train_samples_per_second": 147.661,
39
+ "train_steps_per_second": 1.884
40
  }
41
  ],
42
  "logging_steps": 500,
43
+ "max_steps": 1600,
44
  "num_input_tokens_seen": 0,
45
+ "num_train_epochs": 100,
46
  "save_steps": 500,
47
  "stateful_callbacks": {
48
  "TrainerControl": {
 
56
  "attributes": {}
57
  }
58
  },
59
+ "total_flos": 1.6588112479488e+16,
60
+ "train_batch_size": 80,
61
  "trial_name": null,
62
  "trial_params": null
63
  }