{ | |
"optimizer_state_offchip": true, | |
"replicated_tensor_sharding": true, | |
"enable_half_partials": true, | |
"gradient_accumulation_steps": 32, | |
"executable_cache_dir": "./exe_cache", | |
"layers_per_ipu": [1, 2, 3, 3, 3, 0, 6, 6], | |
"matmul_proportion": [0.6, 0.6, 0.2, 0.2, 0.2, 0.6, 0.2, 0.2], | |
"serialized_projection_splits_per_ipu": [0, 0, 0, 0, 0, 4, 0, 0] | |
} |