commit 1st epoch(0.96) of training with wizardlm data

Browse files

Files changed (4) hide show

README.md +3 -2
configs/axolotl/wizard-vicuna-13b-step2.yml +78 -0
configs/ds_config-step2.json +58 -0
pytorch_model.bin +1 -1

README.md CHANGED Viewed

@@ -14,7 +14,8 @@ pipeline_tag: text-generation
  - `vicgalle/alpaca-gpt4` 1 epoch, learning rate 3e-5 https://wandb.ai/wing-lian/wizard-vicuna-gpt4/overview
    - `deepspeed scripts/finetune.py configs/axolotl/wizard-vicuna-13b-step1.yml --deepspeed configs/ds_config.json  --num_epochs 2 --warmup_steps 46 --logging_steps 1 --save_steps 23`
- - `wizardlm` TBD
  - `vicuna` TBD
-<pre>Brought to you by the Freedom AI Collective</pre>

  - `vicgalle/alpaca-gpt4` 1 epoch, learning rate 3e-5 https://wandb.ai/wing-lian/wizard-vicuna-gpt4/overview
    - `deepspeed scripts/finetune.py configs/axolotl/wizard-vicuna-13b-step1.yml --deepspeed configs/ds_config.json  --num_epochs 2 --warmup_steps 46 --logging_steps 1 --save_steps 23`
+ - `wizardlm` https://wandb.ai/wing-lian/wizard-vicuna-gpt4/runs/4y38knw4
+   - `deepspeed scripts/finetune.py configs/axolotl/wizard-vicuna-13b-step2.yml --deepspeed configs/ds_config-step2.json  --num_epochs 2 --logging_steps 1`
  - `vicuna` TBD
+<pre>Brought to you by the Freedom AI Collective</pre>

configs/axolotl/wizard-vicuna-13b-step2.yml ADDED Viewed

	@@ -0,0 +1,78 @@

+# base_model: huggyllama/llama-13b
+# base_model_config: huggyllama/llama-13b
+base_model: /workspace/llama-13b-alpaca-wizard-vicuna/
+base_model_config: huggyllama/llama-13b
+model_type: LlamaForCausalLM
+tokenizer_type: LlamaTokenizer
+load_in_8bit: false
+datasets:
+#  - path: vicgalle/alpaca-gpt4
+#    type: alpaca
+#  - path: anon8231489123/ShareGPT_Vicuna_unfiltered
+#    data_files: ShareGPT_V3_unfiltered_cleaned_split_no_imsorry.json
+#    type: sharegpt
+  - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered
+    type: alpaca
+dataset_prepared_path: data/last_run_prepared
+val_set_size: 0.04
+adapter:
+lora_model_dir:
+sequence_len: 2048
+max_packed_sequence_len: 2048
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+  - q_proj
+  - v_proj
+#  - k_proj
+#  - o_proj
+lora_fan_in_fan_out: false
+wandb_project:
+wandb_watch:
+wandb_run_id:
+wandb_log_model: checkpoint
+output_dir: ./wizard-lm-out
+batch_size: 128
+micro_batch_size: 1
+num_epochs: 2
+warmup_steps: 117
+logging_steps:
+learning_rate: 0.000003
+optimizer: adamw_torch
+torchdistx_path:
+lr_scheduler: one_cycle
+log_sweep_min_lr: 2e-6
+log_sweep_max_lr: 1e-4
+train_on_inputs: false
+group_by_length: false
+bf16: true
+tf32: true
+gradient_checkpointing:
+early_stopping_patience:
+resume_from_checkpoint:
+auto_resume_from_checkpoints:
+local_rank:
+load_4bit:
+xformers_attention:
+flash_attention: true
+gptq_groupsize:
+gptq_model_v1:
+save_steps: 56
+eval_steps: 14
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+  fsdp_transformer_layer_cls_to_wrap:
+  fsdp_min_num_params: 2000
+  fsdp_backward_prefetch:
+    - backward_pre
+  limit_all_gathers: false
+special_tokens:
+  pad_token: "[PAD]"
+  bos_token: "<s>"
+  eos_token: "</s>"
+  unk_token: "<unk>"

configs/ds_config-step2.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "zero_optimization": {
+    "stage": 3,
+    "offload_optimizer": {
+      "device": "cpu",
+      "pin_memory": true
+    },
+    "offload_param": {
+      "device": "cpu",
+      "pin_memory": true
+    },
+    "overlap_comm": true,
+    "contiguous_gradients": true,
+    "sub_group_size": 0,
+    "reduce_bucket_size": "auto",
+    "stage3_prefetch_bucket_size": "auto",
+    "stage3_param_persistence_threshold": "auto",
+    "stage3_max_live_parameters": 0,
+    "stage3_max_reuse_distance": 0,
+    "stage3_gather_16bit_weights_on_model_save": true
+  },
+  "bf16": {
+    "enabled": "auto"
+  },
+  "fp16": {
+    "enabled": "auto",
+    "auto_cast": false,
+    "loss_scale": 0,
+    "initial_scale_power": 32,
+    "loss_scale_window": 1000,
+    "hysteresis": 2,
+    "min_loss_scale": 1
+  },
+  "optimizer": {
+    "type": "AdamW",
+    "params": {
+      "lr": "auto",
+      "betas": [
+        0.9,
+        0.999
+      ],
+      "eps": 1e-8,
+      "weight_decay": 0
+    }
+  },
+  "scheduler": {
+      "type": "OneCycle",
+      "params": {
+          "cycle_min_lr": 0.0000003,
+          "cycle_max_lr": 0.000003,
+	  "cycle_first_step_size": 117
+      }
+  },
+  "train_batch_size": "auto",
+  "train_micro_batch_size_per_gpu": "auto",
+  "wall_clock_breakdown": false
+}

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:889db4f7bfe042df8a8a31be88256992bfb30eece88dca60fadaa83810bf7b13
 size 26031868013

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a0347a171523a89c7faa94c48cdc17284a8dedeb2505c0b1bc4bf7189e33b26
 size 26031868013