chansung commited on
Commit
e6e5397
1 Parent(s): 2de3751

Model save

Browse files
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ license: llama2
4
+ base_model: meta-llama/Llama-2-13b-hf
5
+ tags:
6
+ - trl
7
+ - sft
8
+ - generated_from_trainer
9
+ datasets:
10
+ - generator
11
+ model-index:
12
+ - name: llama2-13b-lora-classification-11-v1
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ # llama2-13b-lora-classification-11-v1
20
+
21
+ This model is a fine-tuned version of [meta-llama/Llama-2-13b-hf](https://huggingface.co/meta-llama/Llama-2-13b-hf) on the generator dataset.
22
+ It achieves the following results on the evaluation set:
23
+ - Loss: 2.0208
24
+
25
+ ## Model description
26
+
27
+ More information needed
28
+
29
+ ## Intended uses & limitations
30
+
31
+ More information needed
32
+
33
+ ## Training and evaluation data
34
+
35
+ More information needed
36
+
37
+ ## Training procedure
38
+
39
+ ### Training hyperparameters
40
+
41
+ The following hyperparameters were used during training:
42
+ - learning_rate: 0.0002
43
+ - train_batch_size: 16
44
+ - eval_batch_size: 16
45
+ - seed: 42
46
+ - distributed_type: multi-GPU
47
+ - num_devices: 8
48
+ - gradient_accumulation_steps: 2
49
+ - total_train_batch_size: 256
50
+ - total_eval_batch_size: 128
51
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
52
+ - lr_scheduler_type: cosine
53
+ - lr_scheduler_warmup_ratio: 0.1
54
+ - num_epochs: 1
55
+
56
+ ### Training results
57
+
58
+ | Training Loss | Epoch | Step | Validation Loss |
59
+ |:-------------:|:-----:|:----:|:---------------:|
60
+ | 1.8348 | 1.0 | 72 | 2.0208 |
61
+
62
+
63
+ ### Framework versions
64
+
65
+ - PEFT 0.13.2
66
+ - Transformers 4.46.2
67
+ - Pytorch 2.5.1+cu124
68
+ - Datasets 3.1.0
69
+ - Tokenizers 0.20.3
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-13b-hf",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 16,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
+ ],
26
+ "task_type": "CAUSAL_LM",
27
+ "use_dora": false,
28
+ "use_rslora": false
29
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70b0bfb2699dd7f2843c27faa3c0536c5e07f75b0358a928bc277e0cbc655cb8
3
+ size 681636888
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|im_end|>": 32001,
3
+ "<|im_start|>": 32000
4
+ }
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 7.280931693081395e+17,
4
+ "train_loss": 2.938576486375597,
5
+ "train_runtime": 504.3487,
6
+ "train_samples": 92634,
7
+ "train_samples_per_second": 36.431,
8
+ "train_steps_per_second": 0.143
9
+ }
runs/Nov22_14-35-18_metallic-vm-falls-fin-02/events.out.tfevents.1732286447.metallic-vm-falls-fin-02.50699.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:338a894fd66b64cb241c3cfdfb19682299b804dc040a5ec796f16ed1fd3c2302
3
+ size 9356
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<|im_end|>",
21
+ "unk_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<|im_start|>",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": true
37
+ },
38
+ "32001": {
39
+ "content": "<|im_end|>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false,
44
+ "special": true
45
+ }
46
+ },
47
+ "additional_special_tokens": [
48
+ "<|im_start|>",
49
+ "<|im_end|>"
50
+ ],
51
+ "bos_token": "<|im_start|>",
52
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
53
+ "clean_up_tokenization_spaces": false,
54
+ "eos_token": "<|im_end|>",
55
+ "legacy": false,
56
+ "model_max_length": 2048,
57
+ "pad_token": "<|im_end|>",
58
+ "padding_side": "right",
59
+ "sp_model_kwargs": {},
60
+ "tokenizer_class": "LlamaTokenizer",
61
+ "unk_token": "<unk>",
62
+ "use_default_system_prompt": false
63
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 7.280931693081395e+17,
4
+ "train_loss": 2.938576486375597,
5
+ "train_runtime": 504.3487,
6
+ "train_samples": 92634,
7
+ "train_samples_per_second": 36.431,
8
+ "train_steps_per_second": 0.143
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 72,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.013888888888888888,
13
+ "grad_norm": 2.69645357131958,
14
+ "learning_rate": 2.5e-05,
15
+ "loss": 6.4485,
16
+ "step": 1
17
+ },
18
+ {
19
+ "epoch": 0.06944444444444445,
20
+ "grad_norm": 3.235274076461792,
21
+ "learning_rate": 0.000125,
22
+ "loss": 6.3994,
23
+ "step": 5
24
+ },
25
+ {
26
+ "epoch": 0.1388888888888889,
27
+ "grad_norm": 3.4826488494873047,
28
+ "learning_rate": 0.0001995184726672197,
29
+ "loss": 5.6587,
30
+ "step": 10
31
+ },
32
+ {
33
+ "epoch": 0.20833333333333334,
34
+ "grad_norm": 4.541069984436035,
35
+ "learning_rate": 0.00019415440651830208,
36
+ "loss": 4.6657,
37
+ "step": 15
38
+ },
39
+ {
40
+ "epoch": 0.2777777777777778,
41
+ "grad_norm": 4.04740047454834,
42
+ "learning_rate": 0.00018314696123025454,
43
+ "loss": 3.7298,
44
+ "step": 20
45
+ },
46
+ {
47
+ "epoch": 0.3472222222222222,
48
+ "grad_norm": 1.9904128313064575,
49
+ "learning_rate": 0.00016715589548470185,
50
+ "loss": 3.0269,
51
+ "step": 25
52
+ },
53
+ {
54
+ "epoch": 0.4166666666666667,
55
+ "grad_norm": 1.2599273920059204,
56
+ "learning_rate": 0.0001471396736825998,
57
+ "loss": 2.541,
58
+ "step": 30
59
+ },
60
+ {
61
+ "epoch": 0.4861111111111111,
62
+ "grad_norm": 0.848343014717102,
63
+ "learning_rate": 0.0001242980179903264,
64
+ "loss": 2.2349,
65
+ "step": 35
66
+ },
67
+ {
68
+ "epoch": 0.5555555555555556,
69
+ "grad_norm": 0.7984316349029541,
70
+ "learning_rate": 0.0001,
71
+ "loss": 2.0682,
72
+ "step": 40
73
+ },
74
+ {
75
+ "epoch": 0.625,
76
+ "grad_norm": 0.6735253930091858,
77
+ "learning_rate": 7.570198200967362e-05,
78
+ "loss": 1.9724,
79
+ "step": 45
80
+ },
81
+ {
82
+ "epoch": 0.6944444444444444,
83
+ "grad_norm": 0.6062337160110474,
84
+ "learning_rate": 5.286032631740023e-05,
85
+ "loss": 1.9033,
86
+ "step": 50
87
+ },
88
+ {
89
+ "epoch": 0.7638888888888888,
90
+ "grad_norm": 0.5750789046287537,
91
+ "learning_rate": 3.2844104515298155e-05,
92
+ "loss": 1.8667,
93
+ "step": 55
94
+ },
95
+ {
96
+ "epoch": 0.8333333333333334,
97
+ "grad_norm": 0.6575501561164856,
98
+ "learning_rate": 1.6853038769745467e-05,
99
+ "loss": 1.8513,
100
+ "step": 60
101
+ },
102
+ {
103
+ "epoch": 0.9027777777777778,
104
+ "grad_norm": 0.5935372710227966,
105
+ "learning_rate": 5.8455934816979305e-06,
106
+ "loss": 1.8311,
107
+ "step": 65
108
+ },
109
+ {
110
+ "epoch": 0.9722222222222222,
111
+ "grad_norm": 0.527962327003479,
112
+ "learning_rate": 4.815273327803182e-07,
113
+ "loss": 1.8348,
114
+ "step": 70
115
+ },
116
+ {
117
+ "epoch": 1.0,
118
+ "eval_loss": 2.020827293395996,
119
+ "eval_runtime": 1.1461,
120
+ "eval_samples_per_second": 3.49,
121
+ "eval_steps_per_second": 0.873,
122
+ "step": 72
123
+ },
124
+ {
125
+ "epoch": 1.0,
126
+ "step": 72,
127
+ "total_flos": 7.280931693081395e+17,
128
+ "train_loss": 2.938576486375597,
129
+ "train_runtime": 504.3487,
130
+ "train_samples_per_second": 36.431,
131
+ "train_steps_per_second": 0.143
132
+ }
133
+ ],
134
+ "logging_steps": 5,
135
+ "max_steps": 72,
136
+ "num_input_tokens_seen": 0,
137
+ "num_train_epochs": 1,
138
+ "save_steps": 100,
139
+ "stateful_callbacks": {
140
+ "TrainerControl": {
141
+ "args": {
142
+ "should_epoch_stop": false,
143
+ "should_evaluate": false,
144
+ "should_log": false,
145
+ "should_save": true,
146
+ "should_training_stop": true
147
+ },
148
+ "attributes": {}
149
+ }
150
+ },
151
+ "total_flos": 7.280931693081395e+17,
152
+ "train_batch_size": 16,
153
+ "trial_name": null,
154
+ "trial_params": null
155
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f81cc204c8e68a5222c2718f077187272a4271c4d41914fe62fdb1b06ac80dd4
3
+ size 5688