atsuki-yamaguchi commited on
Commit
63eb15d
1 Parent(s): fcfa1ea

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,35 +1,21 @@
1
  ---
2
- license: mit
3
- language:
4
- - ja
5
  ---
6
- Mistral-7B LAPT + CLP+ Japanese
7
- ===
8
 
9
- ## How to use
10
- ```python
11
- from peft import AutoPeftModelForCausalLM
12
- from transformers import AutoTokenizer
13
 
14
- model = AutoPeftModelForCausalLM.from_pretrained(
15
- "atsuki-yamaguchi/Mistral-7B-v0.1-clpp-ja"
16
- )
17
- tokenizer = AutoTokenizer.from_pretrained(
18
- "atsuki-yamaguchi/Mistral-7B-v0.1-clpp-ja"
19
- )
20
- ```
 
 
 
 
 
21
 
22
- ## Citation
23
- ```
24
- @article{yamaguchi2024empirical,
25
- title={An Empirical Study on Cross-lingual Vocabulary Adaptation for Efficient Generative {LLM} Inference},
26
- author={Atsuki Yamaguchi and Aline Villavicencio and Nikolaos Aletras},
27
- journal={ArXiv},
28
- year={2024},
29
- volume={abs/2402.10712},
30
- url={https://arxiv.org/abs/2402.10712}
31
- }
32
- ```
33
 
34
- ## Link
35
- For more details, please visit https://github.com/gucci-j/llm-cva
 
1
  ---
2
+ library_name: peft
 
 
3
  ---
4
+ ## Training procedure
 
5
 
 
 
 
 
6
 
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: True
10
+ - load_in_4bit: False
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: fp4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float32
18
+ ### Framework versions
19
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ - PEFT 0.5.0
 
adapter_config.json CHANGED
@@ -1,29 +1 @@
1
- {
2
- "auto_mapping": null,
3
- "base_model_name_or_path": "atsuki-yamaguchi/Mistral-7B-v0.1-clpp-ja",
4
- "bias": "none",
5
- "fan_in_fan_out": false,
6
- "inference_mode": true,
7
- "init_lora_weights": true,
8
- "layers_pattern": null,
9
- "layers_to_transform": null,
10
- "lora_alpha": 32,
11
- "lora_dropout": 0.05,
12
- "modules_to_save": [
13
- "lm_head",
14
- "embed_tokens"
15
- ],
16
- "peft_type": "LORA",
17
- "r": 8,
18
- "revision": null,
19
- "target_modules": [
20
- "q_proj",
21
- "v_proj",
22
- "k_proj",
23
- "o_proj",
24
- "gate_proj",
25
- "down_proj",
26
- "up_proj"
27
- ],
28
- "task_type": "CAUSAL_LM"
29
- }
 
1
+ {"auto_mapping": null, "base_model_name_or_path": "atsuki-yamaguchi/Mistral-7B-v0.1-clpp-ja", "bias": "none", "fan_in_fan_out": false, "inference_mode": true, "init_lora_weights": true, "layers_pattern": null, "layers_to_transform": null, "lora_alpha": 32, "lora_dropout": 0.05, "modules_to_save": ["lm_head", "embed_tokens"], "peft_type": "LORA", "r": 8, "revision": null, "target_modules": ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "down_proj", "up_proj"], "task_type": "CAUSAL_LM"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/mnt/parscratch/users/acp23ay/private/models/Mistral-7B-v0.1-ja-clp-plus",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
@@ -18,7 +18,7 @@
18
  "rope_theta": 10000.0,
19
  "sliding_window": 4096,
20
  "tie_word_embeddings": false,
21
- "torch_dtype": "float32",
22
  "transformers_version": "4.35.0.dev0",
23
  "use_cache": true,
24
  "vocab_size": 32000
 
1
  {
2
+ "_name_or_path": "mistralai/Mistral-7B-v0.1",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
18
  "rope_theta": 10000.0,
19
  "sliding_window": 4096,
20
  "tie_word_embeddings": false,
21
+ "torch_dtype": "float64",
22
  "transformers_version": "4.35.0.dev0",
23
  "use_cache": true,
24
  "vocab_size": 32000
model-00001-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e436861c8cf471e17cc9bb08daf046a00f98d710f91ce68c74fd70e28cb94747
3
- size 4987196936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6deef5ea687d29e730b7861e369102168c3929aa1174631bc4c9ec8ad5095fd
3
+ size 4941026032
model-00002-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:539496de5ecdf05ff99768d9dfbfde25838361eb4a3214c9ca70138ca46d92da
3
- size 4899116440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d32f4e850f6bda3b1aa1fd85a372377274b9a8a166dab9047fd4cd958199746
3
+ size 4999813072
model-00003-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:686283795d1b7ec495bdfdff1029d1c240da2213c6cce65d32d2f76de8ce54b3
3
- size 4999813120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f3b1794261ba1e27810d553cd2b5f949fcc3a6fe37ba9b09f5033bc7e5de472
3
+ size 4832007496
model-00004-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73c71ef152c6b70d2114c3e6348df73d7f70fa5a020267f9ddb9163fb0dad519
3
- size 4999813128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f72da02290a5f95304a8cff4c7216f30405a3f0072ac1b2b1a906e9748ff10a7
3
+ size 4999813120
model-00005-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4feb6b67528bcad53df28e1771317e0796f663767c8f1b1fc56b270c238d464
3
- size 4832007496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f289adb9a536fe1aed9ba102e26c6b07185a467cb77def237d8715d378831662
3
+ size 4999813128
model-00006-of-00006.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94286ed0b0dcf9c76517e52fea70500a901795b4ed0667b2e0060431c1aa76f6
3
- size 4249014896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be962dec7dca8150b39d0a05b00e8181a30446d8510bbdd1fb5b96b8d4017cd4
3
+ size 4718777176
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 28966928384
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00006-of-00006.safetensors",
@@ -25,8 +25,8 @@
25
  "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
26
  "model.layers.10.input_layernorm.weight": "model-00003-of-00006.safetensors",
27
  "model.layers.10.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
28
- "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
29
- "model.layers.10.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
30
  "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
31
  "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
32
  "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
@@ -68,24 +68,24 @@
68
  "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
69
  "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
70
  "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
71
- "model.layers.15.input_layernorm.weight": "model-00003-of-00006.safetensors",
72
- "model.layers.15.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
73
  "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
74
  "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
75
- "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
76
  "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
77
  "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
78
  "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
79
  "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
80
  "model.layers.16.input_layernorm.weight": "model-00004-of-00006.safetensors",
81
  "model.layers.16.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
82
- "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
83
  "model.layers.16.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
84
  "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
85
- "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
86
- "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
87
- "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
88
- "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
89
  "model.layers.17.input_layernorm.weight": "model-00004-of-00006.safetensors",
90
  "model.layers.17.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
91
  "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
@@ -131,11 +131,11 @@
131
  "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
132
  "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
133
  "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
134
- "model.layers.21.input_layernorm.weight": "model-00004-of-00006.safetensors",
135
- "model.layers.21.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
137
- "model.layers.21.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
138
- "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
139
  "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
140
  "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
141
  "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
@@ -145,10 +145,10 @@
145
  "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
146
  "model.layers.22.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
147
  "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
148
- "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
149
- "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
150
- "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
151
- "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
152
  "model.layers.23.input_layernorm.weight": "model-00005-of-00006.safetensors",
153
  "model.layers.23.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
154
  "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
@@ -187,8 +187,8 @@
187
  "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
188
  "model.layers.27.input_layernorm.weight": "model-00006-of-00006.safetensors",
189
  "model.layers.27.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
190
- "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
191
- "model.layers.27.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
192
  "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
193
  "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
194
  "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
@@ -239,11 +239,11 @@
239
  "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
240
  "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
241
  "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
242
- "model.layers.4.input_layernorm.weight": "model-00001-of-00006.safetensors",
243
- "model.layers.4.mlp.down_proj.weight": "model-00001-of-00006.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
245
- "model.layers.4.mlp.up_proj.weight": "model-00001-of-00006.safetensors",
246
- "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00006.safetensors",
247
  "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
248
  "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
249
  "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
@@ -253,10 +253,10 @@
253
  "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
254
  "model.layers.5.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
255
  "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
256
- "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
257
  "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
258
- "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
259
- "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
260
  "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
261
  "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
262
  "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 29491216384
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00006-of-00006.safetensors",
 
25
  "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00006.safetensors",
26
  "model.layers.10.input_layernorm.weight": "model-00003-of-00006.safetensors",
27
  "model.layers.10.mlp.down_proj.weight": "model-00003-of-00006.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
30
  "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
31
  "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
32
  "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
 
68
  "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
69
  "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
70
  "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00004-of-00006.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
73
  "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00006.safetensors",
74
  "model.layers.15.mlp.up_proj.weight": "model-00003-of-00006.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
76
  "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
77
  "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
78
  "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
79
  "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
80
  "model.layers.16.input_layernorm.weight": "model-00004-of-00006.safetensors",
81
  "model.layers.16.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
83
  "model.layers.16.mlp.up_proj.weight": "model-00004-of-00006.safetensors",
84
  "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
89
  "model.layers.17.input_layernorm.weight": "model-00004-of-00006.safetensors",
90
  "model.layers.17.mlp.down_proj.weight": "model-00004-of-00006.safetensors",
91
  "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
 
131
  "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
132
  "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
133
  "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00005-of-00006.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
136
  "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00006.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
139
  "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
140
  "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
141
  "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
 
145
  "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
146
  "model.layers.22.mlp.up_proj.weight": "model-00005-of-00006.safetensors",
147
  "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
152
  "model.layers.23.input_layernorm.weight": "model-00005-of-00006.safetensors",
153
  "model.layers.23.mlp.down_proj.weight": "model-00005-of-00006.safetensors",
154
  "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00006.safetensors",
 
187
  "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
188
  "model.layers.27.input_layernorm.weight": "model-00006-of-00006.safetensors",
189
  "model.layers.27.mlp.down_proj.weight": "model-00006-of-00006.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
192
  "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
193
  "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
194
  "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00006.safetensors",
 
239
  "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00006.safetensors",
240
  "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
241
  "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00002-of-00006.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00006.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
247
  "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00006.safetensors",
248
  "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00006.safetensors",
249
  "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00006.safetensors",
 
253
  "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
254
  "model.layers.5.mlp.up_proj.weight": "model-00002-of-00006.safetensors",
255
  "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
257
  "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00006.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
260
  "model.layers.6.input_layernorm.weight": "model-00002-of-00006.safetensors",
261
  "model.layers.6.mlp.down_proj.weight": "model-00002-of-00006.safetensors",
262
  "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00006.safetensors",
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bf9cc3eb296729d77de6918b554460f2e0b5386fda248f222c235e779193b55
3
+ size 567855324
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21655ba8bdde9d44ecb6be12e1fe5543c4cdb67e0e5a746b9dcacce9091f703
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:394f63e155c1870e1adf89d6e808adbedc007a65149566630f394b538ee37fe7
3
+ size 1064
special_tokens_map.json CHANGED
@@ -1,18 +1,6 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "cls_token": {
10
- "content": "[CLS]",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
  "eos_token": {
17
  "content": "</s>",
18
  "lstrip": false,
@@ -20,13 +8,7 @@
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
- "mask_token": {
24
- "content": "[MASK]",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
  "pad_token": {
31
  "content": "[PAD]",
32
  "lstrip": false,
@@ -34,13 +16,7 @@
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
- "sep_token": {
38
- "content": "[SEP]",
39
- "lstrip": false,
40
- "normalized": false,
41
- "rstrip": false,
42
- "single_word": false
43
- },
44
  "unk_token": {
45
  "content": "[UNK]",
46
  "lstrip": false,
 
1
  {
2
+ "bos_token": "<s>",
3
+ "cls_token": "[CLS]",
 
 
 
 
 
 
 
 
 
 
 
 
4
  "eos_token": {
5
  "content": "</s>",
6
  "lstrip": false,
 
8
  "rstrip": false,
9
  "single_word": false
10
  },
11
+ "mask_token": "[MASK]",
 
 
 
 
 
 
12
  "pad_token": {
13
  "content": "[PAD]",
14
  "lstrip": false,
 
16
  "rstrip": false,
17
  "single_word": false
18
  },
19
+ "sep_token": "[SEP]",
 
 
 
 
 
 
20
  "unk_token": {
21
  "content": "[UNK]",
22
  "lstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68ca55416176e6a638fb88f1e1e385f0b51da4a42d9ff8b55573866968fc2805
3
+ size 4664