experiment: seed: 42 name: suchir-demo group: climb dry_run: false offline_run: false resume_checkpoint_path: null resume_run_id: null dataset: name: cambridge-climb/BabyLM subconfig: strict_small tokenizer: name: cambridge-climb/CamBabyTokenizer-8192 add_prefix_space: true data_preprocessing: include_punctuation: true join_sentences: true max_input_length: 128 callback_functions: null model: name: roberta_pre_layer_norm model_kwargs: vocab_size: 8192 num_hidden_layers: 8 num_attention_heads: 8 hidden_size: 256 intermediate_size: 2048 layer_norm_eps: 1.0e-05 eos_token_id: 4 bos_token_id: 3 pad_token_id: 1 tie_word_embeddings: false trainer: batch_size: 32 lr: 0.001 num_warmup_steps: 100000 max_training_steps: 400000 eval_blimp: true eval_glue: false eval_msgs: false eval_perplexity: true objective_curriculum: units: mlm: task_head_params: {} optimizer_params: lr: 0.001 scheduler_params: {} optional_kwargs: mask_probability: 0.15 unmask_probability: 0 steps: mlm: - 0.0 - 1.0 data_curriculum: null vocabulary_curriculum: null