Holmeister commited on
Commit
996f010
1 Parent(s): ee49dd4

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -2
  2. all_results.json +12 -0
  3. eval_results.json +7 -0
  4. train_results.json +8 -0
  5. trainer_state.json +141 -0
README.md CHANGED
@@ -3,6 +3,7 @@ license: bigscience-bloom-rail-1.0
3
  library_name: peft
4
  tags:
5
  - llama-factory
 
6
  - generated_from_trainer
7
  base_model: bigscience/bloom-7b1
8
  model-index:
@@ -15,9 +16,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # BLOOM_AAID_structured_train_final_last
17
 
18
- This model is a fine-tuned version of [bigscience/bloom-7b1](https://huggingface.co/bigscience/bloom-7b1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.3369
21
 
22
  ## Model description
23
 
 
3
  library_name: peft
4
  tags:
5
  - llama-factory
6
+ - lora
7
  - generated_from_trainer
8
  base_model: bigscience/bloom-7b1
9
  model-index:
 
16
 
17
  # BLOOM_AAID_structured_train_final_last
18
 
19
+ This model is a fine-tuned version of [bigscience/bloom-7b1](https://huggingface.co/bigscience/bloom-7b1) on the AAID_structured dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.8413
22
 
23
  ## Model description
24
 
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.3125512715340442,
3
+ "eval_loss": 0.8413172364234924,
4
+ "eval_runtime": 2017.3822,
5
+ "eval_samples_per_second": 23.357,
6
+ "eval_steps_per_second": 0.73,
7
+ "total_flos": 1.7087206139898102e+18,
8
+ "train_loss": 0.2976297422250112,
9
+ "train_runtime": 65664.4367,
10
+ "train_samples_per_second": 10.693,
11
+ "train_steps_per_second": 0.042
12
+ }
eval_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.3125512715340442,
3
+ "eval_loss": 0.8413172364234924,
4
+ "eval_runtime": 2017.3822,
5
+ "eval_samples_per_second": 23.357,
6
+ "eval_steps_per_second": 0.73
7
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.3125512715340442,
3
+ "total_flos": 1.7087206139898102e+18,
4
+ "train_loss": 0.2976297422250112,
5
+ "train_runtime": 65664.4367,
6
+ "train_samples_per_second": 10.693,
7
+ "train_steps_per_second": 0.042
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8413172364234924,
3
+ "best_model_checkpoint": "saves/BLOOM-7B/lora/train_1/checkpoint-200",
4
+ "epoch": 1.3125512715340442,
5
+ "eval_steps": 200,
6
+ "global_step": 1200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.21875854525567404,
13
+ "grad_norm": 0.41011762619018555,
14
+ "learning_rate": 0.00029856911617379416,
15
+ "loss": 0.817,
16
+ "step": 200
17
+ },
18
+ {
19
+ "epoch": 0.21875854525567404,
20
+ "eval_loss": 0.8413172364234924,
21
+ "eval_runtime": 2018.9188,
22
+ "eval_samples_per_second": 23.339,
23
+ "eval_steps_per_second": 0.73,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.4375170905113481,
28
+ "grad_norm": 0.5003934502601624,
29
+ "learning_rate": 0.0002896017519370078,
30
+ "loss": 0.381,
31
+ "step": 400
32
+ },
33
+ {
34
+ "epoch": 0.4375170905113481,
35
+ "eval_loss": 0.8805686831474304,
36
+ "eval_runtime": 2017.6263,
37
+ "eval_samples_per_second": 23.354,
38
+ "eval_steps_per_second": 0.73,
39
+ "step": 400
40
+ },
41
+ {
42
+ "epoch": 0.6562756357670222,
43
+ "grad_norm": 0.671746551990509,
44
+ "learning_rate": 0.0002728756302319302,
45
+ "loss": 0.3025,
46
+ "step": 600
47
+ },
48
+ {
49
+ "epoch": 0.6562756357670222,
50
+ "eval_loss": 1.017870545387268,
51
+ "eval_runtime": 2018.3325,
52
+ "eval_samples_per_second": 23.346,
53
+ "eval_steps_per_second": 0.73,
54
+ "step": 600
55
+ },
56
+ {
57
+ "epoch": 0.8750341810226961,
58
+ "grad_norm": 0.7479655146598816,
59
+ "learning_rate": 0.00024932035201194605,
60
+ "loss": 0.1879,
61
+ "step": 800
62
+ },
63
+ {
64
+ "epoch": 0.8750341810226961,
65
+ "eval_loss": 1.1555241346359253,
66
+ "eval_runtime": 2018.658,
67
+ "eval_samples_per_second": 23.342,
68
+ "eval_steps_per_second": 0.73,
69
+ "step": 800
70
+ },
71
+ {
72
+ "epoch": 1.0937927262783702,
73
+ "grad_norm": 0.6166426539421082,
74
+ "learning_rate": 0.00022024506768721243,
75
+ "loss": 0.0744,
76
+ "step": 1000
77
+ },
78
+ {
79
+ "epoch": 1.0937927262783702,
80
+ "eval_loss": 1.2785167694091797,
81
+ "eval_runtime": 2018.1701,
82
+ "eval_samples_per_second": 23.348,
83
+ "eval_steps_per_second": 0.73,
84
+ "step": 1000
85
+ },
86
+ {
87
+ "epoch": 1.3125512715340442,
88
+ "grad_norm": 0.3912193179130554,
89
+ "learning_rate": 0.0001872657174323126,
90
+ "loss": 0.0231,
91
+ "step": 1200
92
+ },
93
+ {
94
+ "epoch": 1.3125512715340442,
95
+ "eval_loss": 1.336869478225708,
96
+ "eval_runtime": 2018.0932,
97
+ "eval_samples_per_second": 23.349,
98
+ "eval_steps_per_second": 0.73,
99
+ "step": 1200
100
+ },
101
+ {
102
+ "epoch": 1.3125512715340442,
103
+ "step": 1200,
104
+ "total_flos": 1.7087206139898102e+18,
105
+ "train_loss": 0.2976297422250112,
106
+ "train_runtime": 65664.4367,
107
+ "train_samples_per_second": 10.693,
108
+ "train_steps_per_second": 0.042
109
+ }
110
+ ],
111
+ "logging_steps": 200,
112
+ "max_steps": 2742,
113
+ "num_input_tokens_seen": 0,
114
+ "num_train_epochs": 3,
115
+ "save_steps": 200,
116
+ "stateful_callbacks": {
117
+ "EarlyStoppingCallback": {
118
+ "args": {
119
+ "early_stopping_patience": 5,
120
+ "early_stopping_threshold": 0.0
121
+ },
122
+ "attributes": {
123
+ "early_stopping_patience_counter": 0
124
+ }
125
+ },
126
+ "TrainerControl": {
127
+ "args": {
128
+ "should_epoch_stop": false,
129
+ "should_evaluate": false,
130
+ "should_log": false,
131
+ "should_save": true,
132
+ "should_training_stop": true
133
+ },
134
+ "attributes": {}
135
+ }
136
+ },
137
+ "total_flos": 1.7087206139898102e+18,
138
+ "train_batch_size": 32,
139
+ "trial_name": null,
140
+ "trial_params": null
141
+ }