Sven00 commited on
Commit
f1be7a6
1 Parent(s): b5a2f9f

Upload 14 files

Browse files
redpj7B-lora-cnn-dailymail-results_10/checkpoint-200/adapter_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "/domino/edv/afs-mrmc-data-store-rw/innovation/hf/RedPajama-INCITE-7B-Base",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "modules_to_save": null,
10
+ "peft_type": "LORA",
11
+ "r": 8,
12
+ "target_modules": [
13
+ "query_key_value"
14
+ ],
15
+ "task_type": "CAUSAL_LM"
16
+ }
redpj7B-lora-cnn-dailymail-results_10/checkpoint-200/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b47162c9c1217e296a97387e5e0eb5ed2df80aab1b6860fb83c03b0523e15cad
3
+ size 16800753
redpj7B-lora-cnn-dailymail-results_10/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d009bbc725e80e03b7d0537b3c681e631dea889a4c2d98df3fcac73b9ec3c1
3
+ size 33592261
redpj7B-lora-cnn-dailymail-results_10/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a3a34d8b7f1d6505270f74d6050bb998ab2c3c2d5999a4ddc0f07dbd04f62f
3
+ size 14575
redpj7B-lora-cnn-dailymail-results_10/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24cd1b551d8302b040b159cd8f0235934a4a57ad1f8c440a889eb87a9abfb8fe
3
+ size 627
redpj7B-lora-cnn-dailymail-results_10/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.8871846199035645,
3
+ "best_model_checkpoint": "/domino/edv/afs-mrmc-data-store-rw/innovation/hf/redpj7B-lora-cnn-dailymail-results_10/checkpoint-200",
4
+ "epoch": 1.3888888888888888,
5
+ "global_step": 200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.14,
12
+ "learning_rate": 0.0,
13
+ "loss": 1.8996,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.28,
18
+ "learning_rate": 0.0,
19
+ "loss": 1.8773,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.42,
24
+ "learning_rate": 0.0,
25
+ "loss": 1.8595,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.56,
30
+ "learning_rate": 0.0,
31
+ "loss": 1.8621,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.69,
36
+ "learning_rate": 0.0,
37
+ "loss": 1.8206,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.83,
42
+ "learning_rate": 0.0,
43
+ "loss": 1.8477,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.97,
48
+ "learning_rate": 0.0,
49
+ "loss": 1.9184,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 1.11,
54
+ "learning_rate": 0.0,
55
+ "loss": 1.891,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 1.25,
60
+ "learning_rate": 0.0,
61
+ "loss": 1.8885,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 1.39,
66
+ "learning_rate": 0.0,
67
+ "loss": 1.8674,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 1.39,
72
+ "eval_loss": 1.8871846199035645,
73
+ "eval_runtime": 12064.3322,
74
+ "eval_samples_per_second": 1.108,
75
+ "eval_steps_per_second": 0.139,
76
+ "step": 200
77
+ }
78
+ ],
79
+ "max_steps": 432,
80
+ "num_train_epochs": 3,
81
+ "total_flos": 1.2442809896681472e+16,
82
+ "trial_name": null,
83
+ "trial_params": null
84
+ }
redpj7B-lora-cnn-dailymail-results_10/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ce8a29df62c824cf1f148326751b1e02fff8468386faf591f14dde44872f0e
3
+ size 4091
redpj7B-lora-cnn-dailymail-results_10/checkpoint-400/adapter_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "/domino/edv/afs-mrmc-data-store-rw/innovation/hf/RedPajama-INCITE-7B-Base",
3
+ "bias": "none",
4
+ "fan_in_fan_out": false,
5
+ "inference_mode": true,
6
+ "init_lora_weights": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "modules_to_save": null,
10
+ "peft_type": "LORA",
11
+ "r": 8,
12
+ "target_modules": [
13
+ "query_key_value"
14
+ ],
15
+ "task_type": "CAUSAL_LM"
16
+ }
redpj7B-lora-cnn-dailymail-results_10/checkpoint-400/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b47162c9c1217e296a97387e5e0eb5ed2df80aab1b6860fb83c03b0523e15cad
3
+ size 16800753
redpj7B-lora-cnn-dailymail-results_10/checkpoint-400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95f1b2d8af7f3a70cd816a8981e7b02351044a820b84505113ced8f6cc2bd63b
3
+ size 33592261
redpj7B-lora-cnn-dailymail-results_10/checkpoint-400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a857782c970b0078facb7d84667df22e146e80669c1719c902157213fb1b85fb
3
+ size 14575
redpj7B-lora-cnn-dailymail-results_10/checkpoint-400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba29cc1c2f22d2c1b6368568e7c8561705461a5ae66a0a29aec90d4426b3d7e
3
+ size 627
redpj7B-lora-cnn-dailymail-results_10/checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.8871846199035645,
3
+ "best_model_checkpoint": "/domino/edv/afs-mrmc-data-store-rw/innovation/hf/redpj7B-lora-cnn-dailymail-results_10/checkpoint-200",
4
+ "epoch": 2.7777777777777777,
5
+ "global_step": 400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.14,
12
+ "learning_rate": 0.0,
13
+ "loss": 1.8996,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.28,
18
+ "learning_rate": 0.0,
19
+ "loss": 1.8773,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.42,
24
+ "learning_rate": 0.0,
25
+ "loss": 1.8595,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.56,
30
+ "learning_rate": 0.0,
31
+ "loss": 1.8621,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.69,
36
+ "learning_rate": 0.0,
37
+ "loss": 1.8206,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.83,
42
+ "learning_rate": 0.0,
43
+ "loss": 1.8477,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.97,
48
+ "learning_rate": 0.0,
49
+ "loss": 1.9184,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 1.11,
54
+ "learning_rate": 0.0,
55
+ "loss": 1.891,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 1.25,
60
+ "learning_rate": 0.0,
61
+ "loss": 1.8885,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 1.39,
66
+ "learning_rate": 0.0,
67
+ "loss": 1.8674,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 1.39,
72
+ "eval_loss": 1.8871846199035645,
73
+ "eval_runtime": 12064.3322,
74
+ "eval_samples_per_second": 1.108,
75
+ "eval_steps_per_second": 0.139,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 1.53,
80
+ "learning_rate": 0.0,
81
+ "loss": 1.8998,
82
+ "step": 220
83
+ },
84
+ {
85
+ "epoch": 1.67,
86
+ "learning_rate": 0.0,
87
+ "loss": 1.8504,
88
+ "step": 240
89
+ },
90
+ {
91
+ "epoch": 1.81,
92
+ "learning_rate": 0.0,
93
+ "loss": 1.8554,
94
+ "step": 260
95
+ },
96
+ {
97
+ "epoch": 1.94,
98
+ "learning_rate": 0.0,
99
+ "loss": 1.8329,
100
+ "step": 280
101
+ },
102
+ {
103
+ "epoch": 2.08,
104
+ "learning_rate": 0.0,
105
+ "loss": 1.8951,
106
+ "step": 300
107
+ },
108
+ {
109
+ "epoch": 2.22,
110
+ "learning_rate": 0.0,
111
+ "loss": 1.8877,
112
+ "step": 320
113
+ },
114
+ {
115
+ "epoch": 2.36,
116
+ "learning_rate": 0.0,
117
+ "loss": 1.8668,
118
+ "step": 340
119
+ },
120
+ {
121
+ "epoch": 2.5,
122
+ "learning_rate": 0.0,
123
+ "loss": 1.855,
124
+ "step": 360
125
+ },
126
+ {
127
+ "epoch": 2.64,
128
+ "learning_rate": 0.0,
129
+ "loss": 1.8598,
130
+ "step": 380
131
+ },
132
+ {
133
+ "epoch": 2.78,
134
+ "learning_rate": 0.0,
135
+ "loss": 1.8242,
136
+ "step": 400
137
+ },
138
+ {
139
+ "epoch": 2.78,
140
+ "eval_loss": 1.8871846199035645,
141
+ "eval_runtime": 12065.8831,
142
+ "eval_samples_per_second": 1.108,
143
+ "eval_steps_per_second": 0.138,
144
+ "step": 400
145
+ }
146
+ ],
147
+ "max_steps": 432,
148
+ "num_train_epochs": 3,
149
+ "total_flos": 2.4885619793362944e+16,
150
+ "trial_name": null,
151
+ "trial_params": null
152
+ }
redpj7B-lora-cnn-dailymail-results_10/checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9ce8a29df62c824cf1f148326751b1e02fff8468386faf591f14dde44872f0e
3
+ size 4091