Haoxiang-Wang
commited on
Commit
•
476a205
1
Parent(s):
3e3229e
Remove learning rate scheduler in deepspeed config to avoid conflict (#909)
Browse files- deepspeed/zero1.json +0 -10
- deepspeed/zero2.json +0 -10
- deepspeed/zero3.json +0 -10
deepspeed/zero1.json
CHANGED
@@ -24,16 +24,6 @@
|
|
24 |
"weight_decay": "auto"
|
25 |
}
|
26 |
},
|
27 |
-
"scheduler": {
|
28 |
-
"type": "WarmupDecayLR",
|
29 |
-
"params": {
|
30 |
-
"warmup_min_lr": "auto",
|
31 |
-
"warmup_max_lr": "auto",
|
32 |
-
"warmup_num_steps": "auto",
|
33 |
-
"warmup_type": "linear",
|
34 |
-
"total_num_steps": "auto"
|
35 |
-
}
|
36 |
-
},
|
37 |
"gradient_accumulation_steps": "auto",
|
38 |
"train_batch_size": "auto",
|
39 |
"train_micro_batch_size_per_gpu": "auto",
|
|
|
24 |
"weight_decay": "auto"
|
25 |
}
|
26 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
"gradient_accumulation_steps": "auto",
|
28 |
"train_batch_size": "auto",
|
29 |
"train_micro_batch_size_per_gpu": "auto",
|
deepspeed/zero2.json
CHANGED
@@ -28,16 +28,6 @@
|
|
28 |
"weight_decay": "auto"
|
29 |
}
|
30 |
},
|
31 |
-
"scheduler": {
|
32 |
-
"type": "WarmupDecayLR",
|
33 |
-
"params": {
|
34 |
-
"warmup_min_lr": "auto",
|
35 |
-
"warmup_max_lr": "auto",
|
36 |
-
"warmup_num_steps": "auto",
|
37 |
-
"warmup_type": "linear",
|
38 |
-
"total_num_steps": "auto"
|
39 |
-
}
|
40 |
-
},
|
41 |
"gradient_accumulation_steps": "auto",
|
42 |
"train_batch_size": "auto",
|
43 |
"train_micro_batch_size_per_gpu": "auto",
|
|
|
28 |
"weight_decay": "auto"
|
29 |
}
|
30 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
"gradient_accumulation_steps": "auto",
|
32 |
"train_batch_size": "auto",
|
33 |
"train_micro_batch_size_per_gpu": "auto",
|
deepspeed/zero3.json
CHANGED
@@ -32,16 +32,6 @@
|
|
32 |
"weight_decay": "auto"
|
33 |
}
|
34 |
},
|
35 |
-
"scheduler": {
|
36 |
-
"type": "WarmupDecayLR",
|
37 |
-
"params": {
|
38 |
-
"warmup_min_lr": "auto",
|
39 |
-
"warmup_max_lr": "auto",
|
40 |
-
"warmup_num_steps": "auto",
|
41 |
-
"warmup_type": "linear",
|
42 |
-
"total_num_steps": "auto"
|
43 |
-
}
|
44 |
-
},
|
45 |
"gradient_accumulation_steps": "auto",
|
46 |
"train_batch_size": "auto",
|
47 |
"train_micro_batch_size_per_gpu": "auto",
|
|
|
32 |
"weight_decay": "auto"
|
33 |
}
|
34 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
"gradient_accumulation_steps": "auto",
|
36 |
"train_batch_size": "auto",
|
37 |
"train_micro_batch_size_per_gpu": "auto",
|