{ | |
"train_micro_batch_size_per_gpu": 2, | |
"gradient_accumulation_steps": 1, | |
"steps_per_print": 100, | |
"gradient_clipping": 1.0, | |
"fp16": { | |
"enabled": true, | |
"loss_scale": 0, | |
"loss_scale_window": 2000, | |
"hysteresis": 2, | |
"min_loss_scale": 0.0 | |
}, | |
"zero_optimization": { | |
"stage": 2, | |
"reduce_bucket_size": 50000000, | |
"overlap_comm": true | |
}, | |
"sparse_attention": { | |
"mode": "fixed", | |
"block": 16, | |
"different_layout_per_head": true, | |
"num_local_blocks": 8, | |
"num_global_blocks": 1, | |
"attention": "unidirectional", | |
"horizontal_global_attention": false, | |
"num_different_global_patterns": 8 | |
} | |
} | |