|
wandb_version: 1 |
|
|
|
_wandb: |
|
desc: null |
|
value: |
|
cli_version: 0.10.33 |
|
framework: huggingface |
|
huggingface_version: 4.9.0.dev0 |
|
is_jupyter_run: false |
|
is_kaggle_kernel: false |
|
python_version: 3.8.10 |
|
t: |
|
1: |
|
- 3 |
|
- 11 |
|
2: |
|
- 3 |
|
- 11 |
|
4: 3.8.10 |
|
5: 0.10.33 |
|
6: 4.9.0.dev0 |
|
8: |
|
- 5 |
|
adafactor: |
|
desc: null |
|
value: false |
|
adam_beta1: |
|
desc: null |
|
value: 0.9 |
|
adam_beta2: |
|
desc: null |
|
value: 0.98 |
|
adam_epsilon: |
|
desc: null |
|
value: 1.0e-08 |
|
cache_dir: |
|
desc: null |
|
value: null |
|
config_name: |
|
desc: null |
|
value: ./ |
|
dataloader_drop_last: |
|
desc: null |
|
value: false |
|
dataloader_num_workers: |
|
desc: null |
|
value: 0 |
|
dataloader_pin_memory: |
|
desc: null |
|
value: true |
|
dataset_config_name: |
|
desc: null |
|
value: null |
|
dataset_name: |
|
desc: null |
|
value: null |
|
ddp_find_unused_parameters: |
|
desc: null |
|
value: null |
|
debug: |
|
desc: null |
|
value: [] |
|
deepspeed: |
|
desc: null |
|
value: null |
|
disable_tqdm: |
|
desc: null |
|
value: false |
|
do_eval: |
|
desc: null |
|
value: false |
|
do_predict: |
|
desc: null |
|
value: false |
|
do_train: |
|
desc: null |
|
value: false |
|
dtype: |
|
desc: null |
|
value: float32 |
|
eval_accumulation_steps: |
|
desc: null |
|
value: null |
|
eval_steps: |
|
desc: null |
|
value: 20000 |
|
evaluation_strategy: |
|
desc: null |
|
value: IntervalStrategy.NO |
|
fp16: |
|
desc: null |
|
value: false |
|
fp16_backend: |
|
desc: null |
|
value: auto |
|
fp16_full_eval: |
|
desc: null |
|
value: false |
|
fp16_opt_level: |
|
desc: null |
|
value: O1 |
|
gradient_accumulation_steps: |
|
desc: null |
|
value: 4 |
|
greater_is_better: |
|
desc: null |
|
value: null |
|
group_by_length: |
|
desc: null |
|
value: false |
|
ignore_data_skip: |
|
desc: null |
|
value: false |
|
label_names: |
|
desc: null |
|
value: null |
|
label_smoothing_factor: |
|
desc: null |
|
value: 0.0 |
|
learning_rate: |
|
desc: null |
|
value: 5.0e-05 |
|
length_column_name: |
|
desc: null |
|
value: length |
|
line_by_line: |
|
desc: null |
|
value: false |
|
load_best_model_at_end: |
|
desc: null |
|
value: false |
|
local_rank: |
|
desc: null |
|
value: -1 |
|
log_level: |
|
desc: null |
|
value: -1 |
|
log_level_replica: |
|
desc: null |
|
value: -1 |
|
log_on_each_node: |
|
desc: null |
|
value: true |
|
logging_dir: |
|
desc: null |
|
value: ./runs/Jul14_22-29-13_t1v-n-f5c06ea1-w-0 |
|
logging_first_step: |
|
desc: null |
|
value: false |
|
logging_steps: |
|
desc: null |
|
value: 500 |
|
logging_strategy: |
|
desc: null |
|
value: IntervalStrategy.STEPS |
|
lr_scheduler_type: |
|
desc: null |
|
value: SchedulerType.LINEAR |
|
max_eval_samples: |
|
desc: null |
|
value: 2000 |
|
max_grad_norm: |
|
desc: null |
|
value: 1.0 |
|
max_seq_length: |
|
desc: null |
|
value: 4096 |
|
max_steps: |
|
desc: null |
|
value: -1 |
|
metric_for_best_model: |
|
desc: null |
|
value: null |
|
mlm_probability: |
|
desc: null |
|
value: 0.15 |
|
model_name_or_path: |
|
desc: null |
|
value: null |
|
model_type: |
|
desc: null |
|
value: big_bird |
|
mp_parameters: |
|
desc: null |
|
value: '' |
|
no_cuda: |
|
desc: null |
|
value: false |
|
num_train_epochs: |
|
desc: null |
|
value: 5.0 |
|
output_dir: |
|
desc: null |
|
value: ./ |
|
overwrite_cache: |
|
desc: null |
|
value: false |
|
overwrite_output_dir: |
|
desc: null |
|
value: true |
|
pad_to_max_length: |
|
desc: null |
|
value: false |
|
past_index: |
|
desc: null |
|
value: -1 |
|
per_device_eval_batch_size: |
|
desc: null |
|
value: 2 |
|
per_device_train_batch_size: |
|
desc: null |
|
value: 2 |
|
per_gpu_eval_batch_size: |
|
desc: null |
|
value: null |
|
per_gpu_train_batch_size: |
|
desc: null |
|
value: null |
|
prediction_loss_only: |
|
desc: null |
|
value: false |
|
preprocessing_num_workers: |
|
desc: null |
|
value: 96 |
|
push_to_hub: |
|
desc: null |
|
value: true |
|
push_to_hub_model_id: |
|
desc: null |
|
value: '' |
|
push_to_hub_organization: |
|
desc: null |
|
value: null |
|
push_to_hub_token: |
|
desc: null |
|
value: null |
|
remove_unused_columns: |
|
desc: null |
|
value: true |
|
report_to: |
|
desc: null |
|
value: |
|
- tensorboard |
|
- wandb |
|
resume_from_checkpoint: |
|
desc: null |
|
value: null |
|
run_name: |
|
desc: null |
|
value: ./ |
|
save_on_each_node: |
|
desc: null |
|
value: false |
|
save_optimizer: |
|
desc: null |
|
value: true |
|
save_steps: |
|
desc: null |
|
value: 20000 |
|
save_strategy: |
|
desc: null |
|
value: IntervalStrategy.STEPS |
|
save_total_limit: |
|
desc: null |
|
value: 5 |
|
seed: |
|
desc: null |
|
value: 42 |
|
sharded_ddp: |
|
desc: null |
|
value: [] |
|
skip_memory_metrics: |
|
desc: null |
|
value: true |
|
tokenizer_name: |
|
desc: null |
|
value: ./ |
|
tpu_metrics_debug: |
|
desc: null |
|
value: false |
|
tpu_num_cores: |
|
desc: null |
|
value: null |
|
train_ref_file: |
|
desc: null |
|
value: null |
|
use_fast_tokenizer: |
|
desc: null |
|
value: true |
|
use_legacy_prediction_loop: |
|
desc: null |
|
value: false |
|
validation_ref_file: |
|
desc: null |
|
value: null |
|
validation_split_percentage: |
|
desc: null |
|
value: 5 |
|
warmup_ratio: |
|
desc: null |
|
value: 0.0 |
|
warmup_steps: |
|
desc: null |
|
value: 5000 |
|
weight_decay: |
|
desc: null |
|
value: 0.0095 |
|
|