samoline commited on
Commit
d2dcb39
·
verified ·
1 Parent(s): 06dee4b

Training in progress, step 3

Browse files
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "Qwen/Qwen2-0.5B",
5
  "bias": "none",
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "gate_proj",
25
- "k_proj",
26
  "down_proj",
 
 
27
  "up_proj",
 
28
  "o_proj",
29
- "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "Maykeye/TinyLLama-v0",
5
  "bias": "none",
6
  "fan_in_fan_out": null,
7
  "inference_mode": true,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
23
  "down_proj",
24
+ "v_proj",
25
+ "gate_proj",
26
  "up_proj",
27
+ "k_proj",
28
  "o_proj",
29
+ "q_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:125ef983ebe8d657fc93ae86b07870a7f915de42c571715a48902541b4ff9979
3
- size 17640136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ca851f8112f8a40cc892b93e96ab925834bd344ae89509f2f056e26f419aa69
3
+ size 390888
config.json CHANGED
@@ -1,29 +1,32 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "Qwen/Qwen2-0.5B",
4
  "architectures": [
5
- "Qwen2ForCausalLM"
6
  ],
 
7
  "attention_dropout": 0.0,
8
- "eos_token_id": 151643,
 
 
9
  "hidden_act": "silu",
10
- "hidden_size": 896,
11
  "initializer_range": 0.02,
12
- "intermediate_size": 4864,
13
- "max_position_embeddings": 131072,
14
- "max_window_layers": 24,
15
- "model_type": "qwen2",
16
- "num_attention_heads": 14,
17
- "num_hidden_layers": 24,
18
- "num_key_value_heads": 2,
 
 
19
  "rms_norm_eps": 1e-06,
20
  "rope_scaling": null,
21
- "rope_theta": 1000000.0,
22
- "sliding_window": null,
23
- "tie_word_embeddings": true,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.46.0",
26
  "use_cache": false,
27
- "use_sliding_window": false,
28
- "vocab_size": 151936
29
  }
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "Maykeye/TinyLLama-v0",
4
  "architectures": [
5
+ "LlamaForCausalLM"
6
  ],
7
+ "attention_bias": false,
8
  "attention_dropout": 0.0,
9
+ "bos_token_id": 1,
10
+ "eos_token_id": 2,
11
+ "head_dim": 4,
12
  "hidden_act": "silu",
13
+ "hidden_size": 64,
14
  "initializer_range": 0.02,
15
+ "intermediate_size": 256,
16
+ "max_position_embeddings": 2048,
17
+ "mlp_bias": false,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 8,
21
+ "num_key_value_heads": 16,
22
+ "pad_token_id": 0,
23
+ "pretraining_tp": 1,
24
  "rms_norm_eps": 1e-06,
25
  "rope_scaling": null,
26
+ "rope_theta": 10000.0,
27
+ "tie_word_embeddings": false,
 
28
  "torch_dtype": "bfloat16",
29
  "transformers_version": "4.46.0",
30
  "use_cache": false,
31
+ "vocab_size": 32000
 
32
  }
special_tokens_map.json CHANGED
@@ -1,20 +1,30 @@
1
  {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>"
5
- ],
 
 
 
6
  "eos_token": {
7
- "content": "<|endoftext|>",
8
  "lstrip": false,
9
- "normalized": false,
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
  "pad_token": {
14
- "content": "<|endoftext|>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
18
  "single_word": false
 
 
 
 
 
 
 
19
  }
20
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "eos_token": {
10
+ "content": "</s>",
11
  "lstrip": false,
12
+ "normalized": true,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "</s>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
  }
30
  }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcfe42da0a4497e8b2b172c1f9f4ec423a46dc12907f4349c55025f670422ba9
3
- size 11418266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008509a864db476646817f12b72e075f4d3bda417117a3a44394578e5c2a8e1c
3
+ size 3864466
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1b681ec7fc02fed5edd3026687d7a692a918c4dd8e150ca2e3994a6229843b
3
+ size 534194
tokenizer_config.json CHANGED
@@ -1,24 +1,26 @@
1
  {
2
- "add_prefix_space": false,
 
 
3
  "added_tokens_decoder": {
4
- "151643": {
5
- "content": "<|endoftext|>",
6
  "lstrip": false,
7
- "normalized": false,
8
  "rstrip": false,
9
  "single_word": false,
10
  "special": true
11
  },
12
- "151644": {
13
- "content": "<|im_start|>",
14
  "lstrip": false,
15
- "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
- "151645": {
21
- "content": "<|im_end|>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
@@ -26,18 +28,15 @@
26
  "special": true
27
  }
28
  },
29
- "additional_special_tokens": [
30
- "<|im_start|>",
31
- "<|im_end|>"
32
- ],
33
- "bos_token": null,
34
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
- "eos_token": "<|endoftext|>",
37
- "errors": "replace",
38
- "model_max_length": 32768,
39
- "pad_token": "<|endoftext|>",
40
- "split_special_tokens": false,
41
- "tokenizer_class": "Qwen2Tokenizer",
42
- "unk_token": null
 
43
  }
 
1
  {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
  "lstrip": false,
9
+ "normalized": true,
10
  "rstrip": false,
11
  "single_word": false,
12
  "special": true
13
  },
14
+ "1": {
15
+ "content": "<s>",
16
  "lstrip": false,
17
+ "normalized": true,
18
  "rstrip": false,
19
  "single_word": false,
20
  "special": true
21
  },
22
+ "2": {
23
+ "content": "</s>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
 
28
  "special": true
29
  }
30
  },
31
+ "bos_token": "<s>",
 
 
 
 
32
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
33
  "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c819df3701b1b2d9b8598a1b8307b538beb8918319ca83e3eb3e70687d860a64
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eceebe8114c0c8e8ccec4e3c25275b80e736034181fd8163f052d39aa8f75a3
3
  size 6776