diff --git "a/run_stream.512.log" "b/run_stream.512.log" new file mode 100644--- /dev/null +++ "b/run_stream.512.log" @@ -0,0 +1,219 @@ +2021-07-26 00:12:24.148625: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory +[00:12:25] - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False +[00:12:25] - INFO - __main__ - Training/evaluation parameters TrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.98, +adam_epsilon=1e-06, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=False, +do_predict=False, +do_train=False, +eval_accumulation_steps=None, +eval_steps=1000, +evaluation_strategy=IntervalStrategy.NO, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=1, +greater_is_better=None, +group_by_length=False, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0006, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./outputs/runs/Jul26_00-12-25_tablespoon, +logging_first_step=False, +logging_steps=500, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=3.0, +output_dir=./outputs, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=48, +per_device_train_batch_size=48, +prediction_loss_only=False, +push_to_hub=False, +push_to_hub_model_id=outputs, +push_to_hub_organization=None, +push_to_hub_token=None, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=./outputs, +save_on_each_node=False, +save_steps=1000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=5, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=500, +weight_decay=0.01, +) +[00:12:26] - WARNING - __main__ - Unable to load local dataset with perplexity sampling support. Using huggingface.co/datasets/bertin-project/mc4-es-sampled: BuilderConfig Mc4EsSampledConfig(name='stepwise', version='0.0.0', data_dir=None, data_files={}, description=None) doesn't have a 'boundaries' key. +[00:12:27] - INFO - absl - Starting the local TPU driver. +[00:12:27] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// +[00:12:27] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: TPU Interpreter Host +wandb: Currently logged in as: versae (use `wandb login --relogin` to force relogin) +wandb: wandb version 0.11.0 is available! To upgrade, please run: +wandb: $ pip install wandb --upgrade +2021-07-26 00:12:34.440423: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory +wandb: Tracking run with wandb version 0.10.33 +wandb: Syncing run amber-galaxy-46 +wandb: View project at https://wandb.ai/wandb/hf-flax-bertin-roberta-es +wandb: View run at https://wandb.ai/wandb/hf-flax-bertin-roberta-es/runs/17u6inbn +wandb: Run data is saved locally in /var/hf/experiment-base-exp-512seq-stepwise/wandb/run-20210726_001233-17u6inbn +wandb: Run `wandb offline` to turn off syncing. +2021-07-26 00:12:35.575266: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2021-07-26 00:12:35.575304: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) +[00:12:36] - INFO - filelock - Lock 139656499698272 acquired on /home/versae/.cache/huggingface/transformers/27b7e968d2908b27f8c1df265c2dc08aef61be0f25bdc735df4df552829968fd.04a8293889c44bb7f31a5ee6212b8aa0b690121444e9c7ce1616fbe2a461ebba.lock + + Downloading: 0%| | 0.00/250M [00:00