ylwt
/

PaddlePaddle-galactica-125m

Model card Files Files and versions Community

PaddlePaddle-galactica-125m / config.yaml

ylwt's picture

first commit

4d2400c 8 months ago

history blame contribute delete

2.33 kB

	arch:
	type: TransformerLMHeadModel
	args:
	transformer_config:
	type: TransformerDecoderOnlyModel
	args:
	embed_config:
	type: OPTEmbeddingBlock
	args:
	token_embed_config:
	type: TokenEmbedding
	args:
	n_embed: 768
	n_vocab: 50000
	pos_embed_config:
	type: OPTLearnedPositionalEmbedding
	args:
	n_pos: 2048
	n_embed: 768
	offset: 2
	type_embed_config: null
	ln_config: null
	p_drop_embed: 0.1
	concat_strategy: id_first
	decoder_config:
	type: TransformerDecoderBlock
	args:
	attn_config:
	type: MultiHeadKeyValueAttention
	args:
	n_embed: 768
	n_pos: 2048
	n_head: 12
	head_size: 64
	p_drop_attn: 0.1
	p_drop_resid: 0.1
	bias_attn: true
	bias_proj: true
	cross_attn: false
	scale_dot_product: false
	scale_layer_wise: false
	layer_idx: null
	perform_linear_bias: false
	perform_bloom_split_head: false
	perform_query_scaling: true
	attn_window_size: null
	mlp_config:
	type: TransformerMLP
	args:
	n_embed: 768
	n_inner: 3072
	act_fn_config:
	type: NewGELUActivation
	args: {}
	p_drop_mlp: 0.1
	ln_config:
	type: LayerNorm
	args:
	n_embed: 768
	ln_eps: 1.0e-05
	n_embed: 768
	post_norm: false
	add_cross_attn: false
	n_embed: 768
	n_layer: 12
	n_head: 12
	ln_config:
	type: LayerNorm
	args:
	n_embed: 768
	ln_eps: 1.0e-05
	perform_linear_bias: false
	attn_window_size_loop_unit: null
	lm_head_config:
	type: TransformerLMHead
	args:
	n_vocab: 50000
	n_embed: 768
	perform_transform: false
	act_fn_config: null
	ln_config: null