climb-roberta_pre_layer_norm-model / hydra_config_1713758125.4750285.yaml
suchirsalhan's picture
Upload 12 files
caf3ce7 verified
experiment:
seed: 42
name: suchir-demo
group: climb
dry_run: false
offline_run: false
resume_checkpoint_path: null
resume_run_id: null
dataset:
name: cambridge-climb/BabyLM
subconfig: strict_small
tokenizer:
name: cambridge-climb/CamBabyTokenizer-8192
add_prefix_space: true
data_preprocessing:
include_punctuation: true
join_sentences: true
max_input_length: 128
callback_functions: null
model:
name: roberta_pre_layer_norm
model_kwargs:
vocab_size: 8192
num_hidden_layers: 8
num_attention_heads: 8
hidden_size: 256
intermediate_size: 2048
layer_norm_eps: 1.0e-05
eos_token_id: 4
bos_token_id: 3
pad_token_id: 1
tie_word_embeddings: false
trainer:
batch_size: 32
lr: 0.001
num_warmup_steps: 100000
max_training_steps: 400000
eval_blimp: true
eval_glue: false
eval_msgs: false
eval_perplexity: true
objective_curriculum:
units:
mlm:
task_head_params: {}
optimizer_params:
lr: 0.001
scheduler_params: {}
optional_kwargs:
mask_probability: 0.15
unmask_probability: 0
steps:
mlm:
- 0.0
- 1.0
data_curriculum: null
vocabulary_curriculum: null