climb-roberta_pre_layer_norm-model / hydra_config_1713758125.4750285.yaml

Upload 12 files

caf3ce7 verified 7 months ago

1.2 kB

	experiment:
	seed: 42
	name: suchir-demo
	group: climb
	dry_run: false
	offline_run: false
	resume_checkpoint_path: null
	resume_run_id: null
	dataset:
	name: cambridge-climb/BabyLM
	subconfig: strict_small
	tokenizer:
	name: cambridge-climb/CamBabyTokenizer-8192
	add_prefix_space: true
	data_preprocessing:
	include_punctuation: true
	join_sentences: true
	max_input_length: 128
	callback_functions: null
	model:
	name: roberta_pre_layer_norm
	model_kwargs:
	vocab_size: 8192
	num_hidden_layers: 8
	num_attention_heads: 8
	hidden_size: 256
	intermediate_size: 2048
	layer_norm_eps: 1.0e-05
	eos_token_id: 4
	bos_token_id: 3
	pad_token_id: 1
	tie_word_embeddings: false
	trainer:
	batch_size: 32
	lr: 0.001
	num_warmup_steps: 100000
	max_training_steps: 400000
	eval_blimp: true
	eval_glue: false
	eval_msgs: false
	eval_perplexity: true
	objective_curriculum:
	units:
	mlm:
	task_head_params: {}
	optimizer_params:
	lr: 0.001
	scheduler_params: {}
	optional_kwargs:
	mask_probability: 0.15
	unmask_probability: 0
	steps:
	mlm:
	- 0.0
	- 1.0
	data_curriculum: null
	vocabulary_curriculum: null