{ "pipe_parallel_size": 1, "model_parallel_size": 8, "make_vocab_size_divisible_by": 1, # model settings "num_layers": 80, "hidden_size": 8192, "num_attention_heads": 64, "seq_length": 2048, "max_position_embeddings": 2048, "pos_emb": "rotary", "rotary_pct": 1, "no_weight_tying": true, "gpt_j_residual": false, "output_layer_parallelism": "column", "norm": "rmsnorm", "rms_norm_epsilon": 1.0e-6, "scaled_upper_triang_masked_softmax_fusion": true, "bias_gelu_fusion": false, "use_bias_in_norms": false, "use_bias_in_attn_linear": false, "activation": "swiglu", "mlp_multiple_of": 256, }