|
experiment: |
|
seed: 42 |
|
name: suchir-demo |
|
group: climb |
|
dry_run: false |
|
offline_run: false |
|
resume_checkpoint_path: null |
|
resume_run_id: null |
|
dataset: |
|
name: cambridge-climb/BabyLM |
|
subconfig: strict_small |
|
tokenizer: |
|
name: cambridge-climb/CamBabyTokenizer-8192 |
|
add_prefix_space: true |
|
data_preprocessing: |
|
include_punctuation: true |
|
join_sentences: true |
|
max_input_length: 128 |
|
callback_functions: null |
|
model: |
|
name: roberta_pre_layer_norm |
|
model_kwargs: |
|
vocab_size: 8192 |
|
num_hidden_layers: 8 |
|
num_attention_heads: 8 |
|
hidden_size: 256 |
|
intermediate_size: 2048 |
|
layer_norm_eps: 1.0e-05 |
|
eos_token_id: 4 |
|
bos_token_id: 3 |
|
pad_token_id: 1 |
|
tie_word_embeddings: false |
|
trainer: |
|
batch_size: 32 |
|
lr: 0.001 |
|
num_warmup_steps: 100000 |
|
max_training_steps: 400000 |
|
eval_blimp: true |
|
eval_glue: false |
|
eval_msgs: false |
|
eval_perplexity: true |
|
objective_curriculum: |
|
units: |
|
mlm: |
|
task_head_params: {} |
|
optimizer_params: |
|
lr: 0.001 |
|
scheduler_params: {} |
|
optional_kwargs: |
|
mask_probability: 0.15 |
|
unmask_probability: 0 |
|
steps: |
|
mlm: |
|
- 0.0 |
|
- 1.0 |
|
data_curriculum: null |
|
vocabulary_curriculum: null |
|
|