peterbeamish commited on
Commit
db80c23
1 Parent(s): 9a423c5

End of training

Browse files
README.md CHANGED
@@ -32,12 +32,12 @@ More information needed
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
- - learning_rate: 0.0006741100367675095
36
- - train_batch_size: 52
37
  - eval_batch_size: 8
38
  - seed: 42
39
  - gradient_accumulation_steps: 5
40
- - total_train_batch_size: 260
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
  - num_epochs: 4
@@ -46,7 +46,7 @@ The following hyperparameters were used during training:
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
- | No log | 2.43 | 100 | 0.9711 |
50
 
51
 
52
  ### Framework versions
 
32
  ### Training hyperparameters
33
 
34
  The following hyperparameters were used during training:
35
+ - learning_rate: 0.00023571124327298023
36
+ - train_batch_size: 60
37
  - eval_batch_size: 8
38
  - seed: 42
39
  - gradient_accumulation_steps: 5
40
+ - total_train_batch_size: 300
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
  - num_epochs: 4
 
46
 
47
  | Training Loss | Epoch | Step | Validation Loss |
48
  |:-------------:|:-----:|:----:|:---------------:|
49
+ | No log | 2.79 | 100 | 1.1266 |
50
 
51
 
52
  ### Framework versions
adapter_config.json CHANGED
@@ -16,8 +16,8 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v",
20
- "q"
21
  ],
22
  "task_type": "SEQ_2_SEQ_LM"
23
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "q",
20
+ "v"
21
  ],
22
  "task_type": "SEQ_2_SEQ_LM"
23
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d1b2057d531f2a5429fcc79d54a8884c543e3f0e7359a624ad94a407a665485
3
  size 18915328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40ae13711796b112ed4cc4860b5077af77d71b8202d675b792ec9d67e8ccfb54
3
  size 18915328
runs/Nov07_02-22-21_147bbb1e1699/events.out.tfevents.1699323745.147bbb1e1699.4282.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a4002f80d1157e39d6460587c65a92e45067304c1a8d1609bfeac18d0c83377
3
+ size 5396
trainer_state.json CHANGED
@@ -1,36 +1,36 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.9805825242718447,
5
  "eval_steps": 100,
6
- "global_step": 164,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 2.43,
13
- "eval_loss": 0.9710711240768433,
14
- "eval_runtime": 1487.3043,
15
- "eval_samples_per_second": 7.203,
16
- "eval_steps_per_second": 0.901,
17
  "step": 100
18
  },
19
  {
20
- "epoch": 3.98,
21
- "step": 164,
22
- "total_flos": 9.889339416772608e+16,
23
- "train_loss": 1.1787047967678164,
24
- "train_runtime": 21689.2536,
25
- "train_samples_per_second": 1.976,
26
- "train_steps_per_second": 0.008
27
  }
28
  ],
29
  "logging_steps": 500,
30
- "max_steps": 164,
31
  "num_train_epochs": 4,
32
  "save_steps": 500,
33
- "total_flos": 9.889339416772608e+16,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.910614525139665,
5
  "eval_steps": 100,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 2.79,
13
+ "eval_loss": 1.1265525817871094,
14
+ "eval_runtime": 577.4869,
15
+ "eval_samples_per_second": 18.551,
16
+ "eval_steps_per_second": 2.32,
17
  "step": 100
18
  },
19
  {
20
+ "epoch": 3.91,
21
+ "step": 140,
22
+ "total_flos": 9.721424741872435e+16,
23
+ "train_loss": 1.4816603524344307,
24
+ "train_runtime": 4588.7784,
25
+ "train_samples_per_second": 9.338,
26
+ "train_steps_per_second": 0.031
27
  }
28
  ],
29
  "logging_steps": 500,
30
+ "max_steps": 140,
31
  "num_train_epochs": 4,
32
  "save_steps": 500,
33
+ "total_flos": 9.721424741872435e+16,
34
  "trial_name": null,
35
  "trial_params": null
36
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3afdb64a91e6b71d89a1d36421804cef1fa3e521341ea328400dcf79b3449517
3
  size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d32b53d138eb2a55a3f8fe697ea898dda14eb2e0e6691f38f463e75fc31d59
3
  size 4536