training / flax /distillation_scripts /run_bs_sweep.yaml
nullonesix's picture
Saving train state of step 1
a1be16b verified
raw
history blame
1.33 kB
command:
- python3
- ${program}
- --do_train
- --use_scan
- --gradient_checkpointing
- --overwrite_output_dir
- --predict_with_generate
- --freeze_encoder
- --streaming
- --use_auth_token
- --compilation_cache
- ${args}
method: grid
metric:
goal: minimize
name: train/loss
parameters:
model_name_or_path:
value: distil-whisper/large-32-2
teacher_model_name_or_path:
value: openai/whisper-large-v2
train_dataset_name:
value: librispeech_asr
train_dataset_config_name:
value: all
train_split_name:
value: train.other.500
train_dataset_samples:
value: 100
cache_dir:
value: /fsx/sanchitgandhi/cache
dataset_cache_dir:
value: /fsx/sanchitgandhi/cache
output_dir:
value: ./
per_device_train_batch_size:
values:
- 128
- 256
- 512
precision:
values:
- "full_mixed"
- "half_mixed"
dtype:
value: bfloat16
do_eval:
value: false
learning_rate:
value: 3e-4
lr_scheduler_type:
value: constant_with_warmup
warmup_steps:
value: 30
max_steps:
value: 30
save_steps:
value: 51 # don't save checkpoints during sweep
dataloader_num_workers:
value: 48
logging_steps:
value: 5
wer_threshold:
value: 100
program: run_distillation.py
project: distil-whisper-sweeps