{ "apply_qk_norm": true, "architectures": [ "OpenLMforCausalLM" ], "attn_func": null, "auto_map": { "AutoConfig": "configuration_openlm.OpenLMConfig", "AutoModel": "modeling_openlm.OpenLMModel", "AutoModelForCausalLM": "modeling_openlm.OpenLMforCausalLM" }, "dim": 2560, "ffn_type": "swiglu", "model_type": "openlm", "moe_capacity_factor": 1.25, "moe_expert_model_parallelism": false, "moe_freq": 0, "moe_loss_weight": 0.1, "moe_num_experts": null, "moe_top_k": 2, "moe_weight_parallelism": false, "n_heads": 32, "n_layers": 32, "norm_eps": 1e-05, "norm_type": null, "params": null, "params_args_dict": { "apply_qk_norm": true, "attn_activation": null, "attn_name": "auto", "attn_seq_scalar": null, "attn_seq_scalar_alpha": null, "dim": 2560, "ffn_type": "swiglu", "model": "open_lm_3b", "model_norm": "gain_only_lp_layer_norm", "moe_capacity_factor": 1.25, "moe_expert_model_parallelism": false, "moe_freq": 0, "moe_loss_weight": 0.1, "moe_num_experts": null, "moe_top_k": 2, "moe_weight_parallelism": false, "n_heads": 32, "n_layers": 32, "norm_eps": 1e-05, "positional_embedding_type": "rotary", "post_embed_norm": false, "qk_norm": true, "seq_len": 2048, "vocab_size": 50432, "weight_tying": false }, "positional_embedding_type": "rotary", "post_embed_norm": false, "seq_len": 2048, "tie_word_embeddings": false, "transformers_version": "4.40.0", "vocab_size": 50432, "weight_tying": false }