OpenMOSE
/

RWKV-x070-2B9-CJE-Instruct

Model card Files Files and versions Community

OpenMOSE commited on 1 day ago

Commit

7eef6fc

·

verified ·

1 Parent(s): 41d5365

Training Script and LayerProfile

Files changed (2) hide show

32_TEST_bone_2b9_mytest.csv +35 -0
step-2-train-sft-x070.sh +25 -0

32_TEST_bone_2b9_mytest.csv ADDED Viewed

	@@ -0,0 +1,35 @@

+Layer,Mode,Rank,Alpha,Dropout,Weight_lr_init,Weight_lr_final,Weight_decay,State_lr_init,State_lr_final,RejectParts
+emb,freeze,0,0,0.01,0.000001,0.0000001,0.01,0.05,0.01,
+0,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+1,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+2,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+3,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+4,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+5,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+6,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+7,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+8,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+9,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+10,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+11,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+12,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+13,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+14,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+15,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+16,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+17,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+18,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+19,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+20,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+21,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+22,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+23,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+24,full,1280,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+25,full,1280,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+26,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+27,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+28,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+29,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+30,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+31,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
+head,full,512,32,0.01,0.00001,0.000001,0.01,0.05,0.01,

step-2-train-sft-x070.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+python train.py --load_model "myfolder/models/rwkv-x070-2b9-world-v3-40_trained-20250113-ctx4k.pth" \
+ --wandb "RWKV-LM-RLHF x070-2b9 General JPENCN v3" \
+ --proj_dir "myfolder/Outputs/x070GeneralJPENCNv3" \
+ --state 0 \
+ --infctx 0 \
+ --vocab_size 65536 --ctx_len 5120 \
+ --epoch_steps 2000 --epoch_count 200 --epoch_begin 0 --epoch_save 1 \
+ --micro_bsz 3 --n_layer 32 --n_embd 2560 \
+ --lr_init 1e-5 --lr_final 1e-6 \
+ --warmup_steps 100 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 \
+ --accelerator gpu --devices 2 --precision 'bf16' \
+ --grad_cp 1 --my_testing "x070" \
+ --strategy deepspeed_stage_2_offload \
+ --layer_profile 'layerprofile/32_TEST_bone_2b9_mytest.csv' \
+ --quant 0 \
+ --quant_mode 'nf4'\
+ --gpu_arch 'rocm' \
+ --limited_lora 0 \
+ --sft 1 \
+ --smoothing 0.001 \
+ --random_mode 1 \
+ --optim '' \
+ --train_data_file 'myfolder/datasets/General-jpencnv3.h5' \
+ --infctx_dataset_multiplier 8 \
+ --accumulate_grad_batches 16