MaziyarPanahi commited on
Commit
b7f22bd
1 Parent(s): 913558b

Delete .ipynb_checkpoints

Browse files
.ipynb_checkpoints/README-checkpoint.md DELETED
@@ -1,152 +0,0 @@
1
- ---
2
- library_name: peft
3
- tags:
4
- - axolotl
5
- - generated_from_trainer
6
- base_model: MaziyarPanahi/Qwen1.5-8x7b
7
- model-index:
8
- - name: Qwen1.5-8x7b-v0.1
9
- results: []
10
- ---
11
-
12
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
13
- should probably proofread and complete it, then remove this comment. -->
14
-
15
- [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
16
- <details><summary>See axolotl config</summary>
17
-
18
- axolotl version: `0.4.0`
19
- ```yaml
20
- base_model: MaziyarPanahi/Qwen1.5-8x7b
21
- model_type: Qwen2ForCausalLM
22
- tokenizer_type: Qwen2Tokenizer
23
-
24
- trust_remote_code: true
25
-
26
- hub_model_id: MaziyarPanahi/Qwen1.5-8x7b-v0.1
27
- hf_use_auth_token: true
28
-
29
- load_in_8bit: false
30
- load_in_4bit: true
31
- strict: false
32
-
33
-
34
- datasets:
35
- - path: Crystalcareai/MoD-150k
36
- type: sharegpt
37
-
38
-
39
- dataset_prepared_path:
40
- val_set_size: 0.05
41
- output_dir: ./Qwen1.5-8x7b-v0.1-lora-out
42
-
43
- model_config:
44
- output_router_logits: true
45
-
46
- adapter: qlora
47
- lora_model_dir:
48
- sequence_len: 2048
49
- sample_packing: true
50
- pad_to_sequence_len: true
51
-
52
-
53
- lora_r: 32
54
- lora_alpha: 16
55
- lora_dropout: 0.05
56
- lora_target_linear: true
57
- lora_fan_in_fan_out:
58
-
59
-
60
- gradient_accumulation_steps: 2
61
- micro_batch_size: 2
62
- num_epochs: 1
63
- optimizer: adamw_bnb_8bit
64
- lr_scheduler: cosine
65
- learning_rate: 0.0002
66
-
67
-
68
- train_on_inputs: false
69
- group_by_length: false
70
- bf16: auto
71
- fp16:
72
- tf32: false
73
-
74
-
75
- gradient_checkpointing: true
76
- early_stopping_patience:
77
- resume_from_checkpoint:
78
- local_rank:
79
- logging_steps: 1
80
- xformers_attention:
81
- flash_attention: true
82
-
83
-
84
- warmup_steps: 10
85
- evals_per_epoch: 4
86
- eval_table_size:
87
- eval_max_new_tokens: 128
88
- saves_per_epoch: 1
89
- debug:
90
- deepspeed:
91
- weight_decay: 0.0
92
- fsdp:
93
- fsdp_config:
94
- special_tokens:
95
- ```
96
-
97
- </details><br>
98
-
99
- # Qwen1.5-8x7b-v0.1
100
-
101
- This model is a fine-tuned version of [MaziyarPanahi/Qwen1.5-8x7b](https://huggingface.co/MaziyarPanahi/Qwen1.5-8x7b) on the None dataset.
102
- It achieves the following results on the evaluation set:
103
- - Loss: 0.7945
104
-
105
- ## Model description
106
-
107
- More information needed
108
-
109
- ## Intended uses & limitations
110
-
111
- More information needed
112
-
113
- ## Training and evaluation data
114
-
115
- More information needed
116
-
117
- ## Training procedure
118
-
119
- ### Training hyperparameters
120
-
121
- The following hyperparameters were used during training:
122
- - learning_rate: 0.0002
123
- - train_batch_size: 2
124
- - eval_batch_size: 2
125
- - seed: 42
126
- - distributed_type: multi-GPU
127
- - num_devices: 4
128
- - gradient_accumulation_steps: 2
129
- - total_train_batch_size: 16
130
- - total_eval_batch_size: 8
131
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
132
- - lr_scheduler_type: cosine
133
- - lr_scheduler_warmup_steps: 10
134
- - num_epochs: 1
135
-
136
- ### Training results
137
-
138
- | Training Loss | Epoch | Step | Validation Loss |
139
- |:-------------:|:-----:|:----:|:---------------:|
140
- | 6.2196 | 0.0 | 1 | 6.1942 |
141
- | 0.7772 | 0.25 | 513 | 0.8037 |
142
- | 0.656 | 0.5 | 1026 | 0.7977 |
143
- | 0.6967 | 0.75 | 1539 | 0.7945 |
144
-
145
-
146
- ### Framework versions
147
-
148
- - PEFT 0.8.2
149
- - Transformers 4.39.0.dev0
150
- - Pytorch 2.2.0+cu121
151
- - Datasets 2.17.0
152
- - Tokenizers 0.15.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/config-checkpoint.json DELETED
@@ -1,32 +0,0 @@
1
- {
2
- "_name_or_path": "MaziyarPanahi/Qwen1.5-8x7b",
3
- "architectures": [
4
- "MixtralForCausalLM"
5
- ],
6
- "attention_dropout": 0.0,
7
- "bos_token_id": null,
8
- "eos_token_id": 151643,
9
- "hidden_act": "silu",
10
- "hidden_size": 4096,
11
- "initializer_range": 0.02,
12
- "intermediate_size": 11008,
13
- "max_position_embeddings": 32768,
14
- "max_window_layers": 28,
15
- "model_type": "mixtral",
16
- "num_attention_heads": 32,
17
- "num_experts_per_tok": 2,
18
- "num_hidden_layers": 32,
19
- "num_key_value_heads": 32,
20
- "num_local_experts": 8,
21
- "output_router_logits": true,
22
- "rms_norm_eps": 1e-06,
23
- "rope_theta": 1000000.0,
24
- "router_aux_loss_coef": 0.001,
25
- "sliding_window": null,
26
- "tie_word_embeddings": false,
27
- "torch_dtype": "bfloat16",
28
- "transformers_version": "4.39.0.dev0",
29
- "use_cache": false,
30
- "use_sliding_window": false,
31
- "vocab_size": 151936
32
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.ipynb_checkpoints/generation_config-checkpoint.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "_from_model_config": true,
3
- "do_sample": true,
4
- "eos_token_id": 151643,
5
- "transformers_version": "4.39.0.dev0",
6
- "use_cache": false
7
- }
 
 
 
 
 
 
 
 
.ipynb_checkpoints/tokenizer_config-checkpoint.json DELETED
@@ -1,44 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "151643": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "151644": {
13
- "content": "<|im_start|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "151645": {
21
- "content": "<|im_end|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- }
28
- },
29
- "additional_special_tokens": [
30
- "<|im_start|>",
31
- "<|im_end|>"
32
- ],
33
- "bos_token": null,
34
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
- "clean_up_tokenization_spaces": false,
36
- "eos_token": "<|endoftext|>",
37
- "errors": "replace",
38
- "model_max_length": 32768,
39
- "pad_token": "<|endoftext|>",
40
- "split_special_tokens": false,
41
- "tokenizer_class": "Qwen2Tokenizer",
42
- "unk_token": null,
43
- "use_fast": true
44
- }