OpenMOSE commited on
Commit
7eef6fc
·
verified ·
1 Parent(s): 41d5365

Training Script and LayerProfile

Browse files
32_TEST_bone_2b9_mytest.csv ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Layer,Mode,Rank,Alpha,Dropout,Weight_lr_init,Weight_lr_final,Weight_decay,State_lr_init,State_lr_final,RejectParts
2
+ emb,freeze,0,0,0.01,0.000001,0.0000001,0.01,0.05,0.01,
3
+ 0,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
4
+ 1,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
5
+ 2,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
6
+ 3,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
7
+ 4,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
8
+ 5,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
9
+ 6,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
10
+ 7,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
11
+ 8,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
12
+ 9,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
13
+ 10,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
14
+ 11,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
15
+ 12,bone,256,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
16
+ 13,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
17
+ 14,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
18
+ 15,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
19
+ 16,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
20
+ 17,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
21
+ 18,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
22
+ 19,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
23
+ 20,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
24
+ 21,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
25
+ 22,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
26
+ 23,bone,512,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
27
+ 24,full,1280,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
28
+ 25,full,1280,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
29
+ 26,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
30
+ 27,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
31
+ 28,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
32
+ 29,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
33
+ 30,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
34
+ 31,full,1024,32,0.01,5.00E-05,1.00E-05,0.01,0.05,0.01,
35
+ head,full,512,32,0.01,0.00001,0.000001,0.01,0.05,0.01,
step-2-train-sft-x070.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ python train.py --load_model "myfolder/models/rwkv-x070-2b9-world-v3-40_trained-20250113-ctx4k.pth" \
2
+ --wandb "RWKV-LM-RLHF x070-2b9 General JPENCN v3" \
3
+ --proj_dir "myfolder/Outputs/x070GeneralJPENCNv3" \
4
+ --state 0 \
5
+ --infctx 0 \
6
+ --vocab_size 65536 --ctx_len 5120 \
7
+ --epoch_steps 2000 --epoch_count 200 --epoch_begin 0 --epoch_save 1 \
8
+ --micro_bsz 3 --n_layer 32 --n_embd 2560 \
9
+ --lr_init 1e-5 --lr_final 1e-6 \
10
+ --warmup_steps 100 --beta1 0.9 --beta2 0.999 --adam_eps 1e-8 \
11
+ --accelerator gpu --devices 2 --precision 'bf16' \
12
+ --grad_cp 1 --my_testing "x070" \
13
+ --strategy deepspeed_stage_2_offload \
14
+ --layer_profile 'layerprofile/32_TEST_bone_2b9_mytest.csv' \
15
+ --quant 0 \
16
+ --quant_mode 'nf4'\
17
+ --gpu_arch 'rocm' \
18
+ --limited_lora 0 \
19
+ --sft 1 \
20
+ --smoothing 0.001 \
21
+ --random_mode 1 \
22
+ --optim '' \
23
+ --train_data_file 'myfolder/datasets/General-jpencnv3.h5' \
24
+ --infctx_dataset_multiplier 8 \
25
+ --accumulate_grad_batches 16