diff --git a/.gitattributes b/.gitattributes deleted file mode 100644 index be0424d9c1da1d5ebed05a465b6bf7e422b1fa72..0000000000000000000000000000000000000000 --- a/.gitattributes +++ /dev/null @@ -1,41 +0,0 @@ -*.7z filter=lfs diff=lfs merge=lfs -text -*.arrow filter=lfs diff=lfs merge=lfs -text -*.bin filter=lfs diff=lfs merge=lfs -text -*.bz2 filter=lfs diff=lfs merge=lfs -text -*.ftz filter=lfs diff=lfs merge=lfs -text -*.gz filter=lfs diff=lfs merge=lfs -text -*.h5 filter=lfs diff=lfs merge=lfs -text -*.joblib filter=lfs diff=lfs merge=lfs -text -*.lfs.* filter=lfs diff=lfs merge=lfs -text -*.mlmodel filter=lfs diff=lfs merge=lfs -text -*.model filter=lfs diff=lfs merge=lfs -text -*.msgpack filter=lfs diff=lfs merge=lfs -text -*.npy filter=lfs diff=lfs merge=lfs -text -*.npz filter=lfs diff=lfs merge=lfs -text -*.onnx filter=lfs diff=lfs merge=lfs -text -*.ot filter=lfs diff=lfs merge=lfs -text -*.parquet filter=lfs diff=lfs merge=lfs -text -*.pb filter=lfs diff=lfs merge=lfs -text -*.pickle filter=lfs diff=lfs merge=lfs -text -*.pkl filter=lfs diff=lfs merge=lfs -text -*.pt filter=lfs diff=lfs merge=lfs -text -*.pth filter=lfs diff=lfs merge=lfs -text -*.rar filter=lfs diff=lfs merge=lfs -text -saved_model/**/* filter=lfs diff=lfs merge=lfs -text -*.tar.* filter=lfs diff=lfs merge=lfs -text -*.tflite filter=lfs diff=lfs merge=lfs -text -*.tgz filter=lfs diff=lfs merge=lfs -text -*.wasm filter=lfs diff=lfs merge=lfs -text -*.xz filter=lfs diff=lfs merge=lfs -text -*.zip filter=lfs diff=lfs merge=lfs -text -*.zst filter=lfs diff=lfs merge=lfs -text -*tfevents* filter=lfs diff=lfs merge=lfs -text -checkpoint-10000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-12500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-15000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-20000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-25000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-5000/tokenizer.json filter=lfs diff=lfs merge=lfs -text -checkpoint-7500/tokenizer.json filter=lfs diff=lfs merge=lfs -text -tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore deleted file mode 100644 index d6a0667dae0c5dc6b4271cddafa30d834e8bab98..0000000000000000000000000000000000000000 --- a/.gitignore +++ /dev/null @@ -1,11 +0,0 @@ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ -checkpoint-*/ -*/pilot_*/ -pilot_*/ diff --git a/all_results.json b/all_results.json deleted file mode 100644 index 5b9e0c7a79a54d7598317da801435e09f7c8e56a..0000000000000000000000000000000000000000 --- a/all_results.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "epoch": 181.16, - "eval_loss": 3.5487985610961914, - "eval_runtime": 512.7811, - "eval_samples": 4906, - "eval_samples_per_second": 9.567, - "eval_steps_per_second": 4.784, - "perplexity": 34.7715165292787, - "train_loss": 3.6800425, - "train_runtime": 52880.0792, - "train_samples": 1000, - "train_samples_per_second": 3.782, - "train_steps_per_second": 0.473 -} \ No newline at end of file diff --git a/checkpoint-10000/config.json b/checkpoint-10000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-10000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-10000/optimizer.pt b/checkpoint-10000/optimizer.pt deleted file mode 100644 index 241f1a34efa3b2c8a1168fc9ba7f50edade0ae64..0000000000000000000000000000000000000000 --- a/checkpoint-10000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89473fe4c6043f80ef69c738a10ca98562441638cc321179f695bacbddb29e11 -size 2254269 diff --git a/checkpoint-10000/pytorch_model.bin b/checkpoint-10000/pytorch_model.bin deleted file mode 100644 index 0faf1493783515762de6827c93a1c804c613aded..0000000000000000000000000000000000000000 --- a/checkpoint-10000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec80f2e8e5cdaee9d52b1e98cb6a801d4d43b3172dc317a3a52187d615a9f2f7 -size 2236955191 diff --git a/checkpoint-10000/rng_state.pth b/checkpoint-10000/rng_state.pth deleted file mode 100644 index 918790ffc8bf32efffd0a21f677a891d7afcca9b..0000000000000000000000000000000000000000 --- a/checkpoint-10000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d74e1315fe06c1a1154b37c656f2f90c1656206e1027b9f3bd60f7fc9d8f41f5 -size 14503 diff --git a/checkpoint-10000/scheduler.pt b/checkpoint-10000/scheduler.pt deleted file mode 100644 index 0278cf8d1585ae197bad514a213741000bcf63c6..0000000000000000000000000000000000000000 --- a/checkpoint-10000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ebae5cf74f470a9dc57b090feb9de29d57aa2d381061d1a61fd32b3c3221556b -size 623 diff --git a/checkpoint-10000/special_tokens_map.json b/checkpoint-10000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-10000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-10000/tokenizer.json b/checkpoint-10000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-10000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-10000/tokenizer_config.json b/checkpoint-10000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-10000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-10000/trainer_state.json b/checkpoint-10000/trainer_state.json deleted file mode 100644 index a232020e30b3897c92290163b6d9ca8d68e332dd..0000000000000000000000000000000000000000 --- a/checkpoint-10000/trainer_state.json +++ /dev/null @@ -1,56 +0,0 @@ -{ - "best_metric": 3.6707117557525635, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-10000", - "epoch": 72.46126126126126, - "global_step": 10000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 9e-05, - "loss": 4.2544, - "step": 2500 - }, - { - "epoch": 36.23, - "learning_rate": 8e-05, - "loss": 3.9123, - "step": 5000 - }, - { - "epoch": 36.23, - "eval_loss": 3.843057632446289, - "eval_runtime": 513.4541, - "eval_samples_per_second": 9.555, - "eval_steps_per_second": 4.777, - "step": 5000 - }, - { - "epoch": 54.35, - "learning_rate": 7e-05, - "loss": 3.7584, - "step": 7500 - }, - { - "epoch": 72.46, - "learning_rate": 6e-05, - "loss": 3.6674, - "step": 10000 - }, - { - "epoch": 72.46, - "eval_loss": 3.6707117557525635, - "eval_runtime": 512.9035, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 10000 - } - ], - "max_steps": 25000, - "num_train_epochs": 182, - "total_flos": 1.4939552059333018e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-10000/training_args.bin b/checkpoint-10000/training_args.bin deleted file mode 100644 index d1628de4c6c12d20f7aa9b4f9eeef9664f72f107..0000000000000000000000000000000000000000 --- a/checkpoint-10000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:511eb83bbd6787b86308ce4eb4e32d6cc27314d329c212c555d84efc6ad6cd39 -size 3375 diff --git a/checkpoint-10000/wikiann-az-results.txt b/checkpoint-10000/wikiann-az-results.txt deleted file mode 100644 index 58e16d8bf317afd6d3fe9a705ae155b61ed3e81c..0000000000000000000000000000000000000000 --- a/checkpoint-10000/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-10000 -[0.37780898876404495, 0.36464857341684065, 0.3656664340544313, 0.3886156008432889, 0.37621023513139695, 0.39790209790209796, 0.3835616438356164, 0.373989218328841, 0.37212495708891175, 0.3786238211666085] -37.79 -0.96 -================================================== \ No newline at end of file diff --git a/checkpoint-12500/config.json b/checkpoint-12500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-12500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-12500/optimizer.pt b/checkpoint-12500/optimizer.pt deleted file mode 100644 index d926eddb615e438bc3786376057b8788f9fa4206..0000000000000000000000000000000000000000 --- a/checkpoint-12500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5970c77e08dd285ccd94a1f5c55d69bd605a422b5d2efbcd7428767e3da92241 -size 2254269 diff --git a/checkpoint-12500/pytorch_model.bin b/checkpoint-12500/pytorch_model.bin deleted file mode 100644 index e07c066d6f56ccba83e1205acf7199ab86198c32..0000000000000000000000000000000000000000 --- a/checkpoint-12500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1367d8f3ff2e6e52e1466116315f08c450ef61a17b91ecb83cdcb38a204d599c -size 2236955191 diff --git a/checkpoint-12500/rng_state.pth b/checkpoint-12500/rng_state.pth deleted file mode 100644 index 5d7243e522139802546c462917414b993421d1c2..0000000000000000000000000000000000000000 --- a/checkpoint-12500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e78992ee794b94f2fbf6fa6e01edca246c41508ad3de33acf1134d85e32c9873 -size 14503 diff --git a/checkpoint-12500/scheduler.pt b/checkpoint-12500/scheduler.pt deleted file mode 100644 index 20fe61e31c8ad93792e2966e7a5aadf8fdfeb769..0000000000000000000000000000000000000000 --- a/checkpoint-12500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d270c6e3000cbdb534f7db7e774ca17393c2523690c8058754d752dd5b11a93a -size 623 diff --git a/checkpoint-12500/special_tokens_map.json b/checkpoint-12500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-12500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-12500/tokenizer.json b/checkpoint-12500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-12500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-12500/tokenizer_config.json b/checkpoint-12500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-12500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-12500/trainer_state.json b/checkpoint-12500/trainer_state.json deleted file mode 100644 index 38ab888397b3f3ca72f34a4f1cdaa6631967ccc2..0000000000000000000000000000000000000000 --- a/checkpoint-12500/trainer_state.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "best_metric": 3.2581348419189453, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-12500", - "epoch": 90.57657657657657, - "global_step": 12500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 0.0008, - "loss": 3.676, - "step": 2500 - }, - { - "epoch": 18.12, - "eval_loss": 3.457474708557129, - "eval_runtime": 529.7099, - "eval_samples_per_second": 9.262, - "eval_steps_per_second": 4.631, - "step": 2500 - }, - { - "epoch": 36.23, - "learning_rate": 0.0006, - "loss": 3.3098, - "step": 5000 - }, - { - "epoch": 36.23, - "eval_loss": 3.3395490646362305, - "eval_runtime": 529.196, - "eval_samples_per_second": 9.271, - "eval_steps_per_second": 4.635, - "step": 5000 - }, - { - "epoch": 54.35, - "learning_rate": 0.0004, - "loss": 3.2132, - "step": 7500 - }, - { - "epoch": 54.35, - "eval_loss": 3.292879581451416, - "eval_runtime": 528.2545, - "eval_samples_per_second": 9.287, - "eval_steps_per_second": 4.644, - "step": 7500 - }, - { - "epoch": 72.46, - "learning_rate": 0.0002, - "loss": 3.1654, - "step": 10000 - }, - { - "epoch": 72.46, - "eval_loss": 3.267232894897461, - "eval_runtime": 529.038, - "eval_samples_per_second": 9.273, - "eval_steps_per_second": 4.637, - "step": 10000 - }, - { - "epoch": 90.58, - "learning_rate": 0.0, - "loss": 3.1379, - "step": 12500 - }, - { - "epoch": 90.58, - "eval_loss": 3.2581348419189453, - "eval_runtime": 529.7541, - "eval_samples_per_second": 9.261, - "eval_steps_per_second": 4.63, - "step": 12500 - } - ], - "max_steps": 12500, - "num_train_epochs": 91, - "total_flos": 1.8674440074166272e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-12500/training_args.bin b/checkpoint-12500/training_args.bin deleted file mode 100644 index 8f9ccdb9ceae148427cb8811e8694d3ae87bf6a0..0000000000000000000000000000000000000000 --- a/checkpoint-12500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d03d7122884f8034582a568ae0aeb2a2c444d81b24122c204b152bc39ad0914e -size 3375 diff --git a/checkpoint-12500/wikiann-az-results.txt b/checkpoint-12500/wikiann-az-results.txt deleted file mode 100644 index 568fec400d667adef02541913ac49cc1a98ac611..0000000000000000000000000000000000000000 --- a/checkpoint-12500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-12500 -[0.37828371278458844, 0.3636993418773814, 0.3639538945162417, 0.37745098039215685, 0.3763066202090592, 0.39721739130434774, 0.37895460797799174, 0.36977152899824256, 0.37243589743589745, 0.3800557880055788] -37.58 -0.91 -================================================== \ No newline at end of file diff --git a/checkpoint-15000/config.json b/checkpoint-15000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-15000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-15000/optimizer.pt b/checkpoint-15000/optimizer.pt deleted file mode 100644 index 2fe9822e5dc304b09fa59a2c0239ba2899a88be5..0000000000000000000000000000000000000000 --- a/checkpoint-15000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f50dd22578e231ca17b8e970b4471e3363c63b3069f7f3518bad48e7985f7c96 -size 2254269 diff --git a/checkpoint-15000/pytorch_model.bin b/checkpoint-15000/pytorch_model.bin deleted file mode 100644 index bdea0249cb74a4fac1f8e35457123a4a88d17795..0000000000000000000000000000000000000000 --- a/checkpoint-15000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ef567c0e28274195ac5c23b0bb43b4c48cefb697c0186129f0a2d689c77939a9 -size 2236955191 diff --git a/checkpoint-15000/rng_state.pth b/checkpoint-15000/rng_state.pth deleted file mode 100644 index 39bb50654458460978b3abac5930624581093478..0000000000000000000000000000000000000000 --- a/checkpoint-15000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79867c9c2c68c84cec8929323f22483c37396d62cbd76ddf28f56bcaae4d084e -size 14503 diff --git a/checkpoint-15000/scheduler.pt b/checkpoint-15000/scheduler.pt deleted file mode 100644 index 8d3d39ad4b2183a90165fa1731ec456fda1ee5c9..0000000000000000000000000000000000000000 --- a/checkpoint-15000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:301727affc0c0a4c1f25106f7fd12c059ede0526ba52733c25be949ad3bc04d7 -size 623 diff --git a/checkpoint-15000/special_tokens_map.json b/checkpoint-15000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-15000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-15000/tokenizer.json b/checkpoint-15000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-15000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-15000/tokenizer_config.json b/checkpoint-15000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-15000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-15000/trainer_state.json b/checkpoint-15000/trainer_state.json deleted file mode 100644 index 54a0309fd000af1093bd68571ed6944f0e22223b..0000000000000000000000000000000000000000 --- a/checkpoint-15000/trainer_state.json +++ /dev/null @@ -1,76 +0,0 @@ -{ - "best_metric": 3.59525203704834, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-15000", - "epoch": 108.69189189189188, - "global_step": 15000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 9e-05, - "loss": 4.2544, - "step": 2500 - }, - { - "epoch": 36.23, - "learning_rate": 8e-05, - "loss": 3.9123, - "step": 5000 - }, - { - "epoch": 36.23, - "eval_loss": 3.843057632446289, - "eval_runtime": 513.4541, - "eval_samples_per_second": 9.555, - "eval_steps_per_second": 4.777, - "step": 5000 - }, - { - "epoch": 54.35, - "learning_rate": 7e-05, - "loss": 3.7584, - "step": 7500 - }, - { - "epoch": 72.46, - "learning_rate": 6e-05, - "loss": 3.6674, - "step": 10000 - }, - { - "epoch": 72.46, - "eval_loss": 3.6707117557525635, - "eval_runtime": 512.9035, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 10000 - }, - { - "epoch": 90.58, - "learning_rate": 5e-05, - "loss": 3.6049, - "step": 12500 - }, - { - "epoch": 108.69, - "learning_rate": 4e-05, - "loss": 3.5633, - "step": 15000 - }, - { - "epoch": 108.69, - "eval_loss": 3.59525203704834, - "eval_runtime": 512.888, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 15000 - } - ], - "max_steps": 25000, - "num_train_epochs": 182, - "total_flos": 2.2409328088999526e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-15000/training_args.bin b/checkpoint-15000/training_args.bin deleted file mode 100644 index d1628de4c6c12d20f7aa9b4f9eeef9664f72f107..0000000000000000000000000000000000000000 --- a/checkpoint-15000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:511eb83bbd6787b86308ce4eb4e32d6cc27314d329c212c555d84efc6ad6cd39 -size 3375 diff --git a/checkpoint-20000/config.json b/checkpoint-20000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-20000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-20000/optimizer.pt b/checkpoint-20000/optimizer.pt deleted file mode 100644 index c926489a5e78a82bdd70c4ce29da63504c297504..0000000000000000000000000000000000000000 --- a/checkpoint-20000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6508d99bdbf164e3ed4c65d681d66d653ae7725ccf2e75ff4eb2967cd0c53f6b -size 2254269 diff --git a/checkpoint-20000/pytorch_model.bin b/checkpoint-20000/pytorch_model.bin deleted file mode 100644 index 496a683de64010c128b3baec8ff580c95fd93485..0000000000000000000000000000000000000000 --- a/checkpoint-20000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:feba206df7956295281ddf29ea9d10e1f1a992f6d424767c5806167a67f6ebb3 -size 2236955191 diff --git a/checkpoint-20000/rng_state.pth b/checkpoint-20000/rng_state.pth deleted file mode 100644 index e7ec7ab711abe8b65b520d1c9ccbf14117bbce02..0000000000000000000000000000000000000000 --- a/checkpoint-20000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5e875fdc7824505ea9288db113abb6ec776202fcd98ab6260922a41311f22d85 -size 14503 diff --git a/checkpoint-20000/scheduler.pt b/checkpoint-20000/scheduler.pt deleted file mode 100644 index 0fce858fc59e1c04346ec17a91eea84ca7634ec2..0000000000000000000000000000000000000000 --- a/checkpoint-20000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:69341a1831197b0345e31eaac56abf9ad4527cc56eba4b526818b4ffb6ef6dad -size 623 diff --git a/checkpoint-20000/special_tokens_map.json b/checkpoint-20000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-20000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-20000/tokenizer.json b/checkpoint-20000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-20000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-20000/tokenizer_config.json b/checkpoint-20000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-20000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-20000/trainer_state.json b/checkpoint-20000/trainer_state.json deleted file mode 100644 index 21734199f98cc365a79dd3389838124bf48c5777..0000000000000000000000000000000000000000 --- a/checkpoint-20000/trainer_state.json +++ /dev/null @@ -1,96 +0,0 @@ -{ - "best_metric": 3.559532403945923, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-20000", - "epoch": 144.9225225225225, - "global_step": 20000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 9e-05, - "loss": 4.2544, - "step": 2500 - }, - { - "epoch": 36.23, - "learning_rate": 8e-05, - "loss": 3.9123, - "step": 5000 - }, - { - "epoch": 36.23, - "eval_loss": 3.843057632446289, - "eval_runtime": 513.4541, - "eval_samples_per_second": 9.555, - "eval_steps_per_second": 4.777, - "step": 5000 - }, - { - "epoch": 54.35, - "learning_rate": 7e-05, - "loss": 3.7584, - "step": 7500 - }, - { - "epoch": 72.46, - "learning_rate": 6e-05, - "loss": 3.6674, - "step": 10000 - }, - { - "epoch": 72.46, - "eval_loss": 3.6707117557525635, - "eval_runtime": 512.9035, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 10000 - }, - { - "epoch": 90.58, - "learning_rate": 5e-05, - "loss": 3.6049, - "step": 12500 - }, - { - "epoch": 108.69, - "learning_rate": 4e-05, - "loss": 3.5633, - "step": 15000 - }, - { - "epoch": 108.69, - "eval_loss": 3.59525203704834, - "eval_runtime": 512.888, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 15000 - }, - { - "epoch": 126.81, - "learning_rate": 3e-05, - "loss": 3.5333, - "step": 17500 - }, - { - "epoch": 144.92, - "learning_rate": 2e-05, - "loss": 3.5125, - "step": 20000 - }, - { - "epoch": 144.92, - "eval_loss": 3.559532403945923, - "eval_runtime": 512.8641, - "eval_samples_per_second": 9.566, - "eval_steps_per_second": 4.783, - "step": 20000 - } - ], - "max_steps": 25000, - "num_train_epochs": 182, - "total_flos": 2.9879104118666035e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-20000/training_args.bin b/checkpoint-20000/training_args.bin deleted file mode 100644 index d1628de4c6c12d20f7aa9b4f9eeef9664f72f107..0000000000000000000000000000000000000000 --- a/checkpoint-20000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:511eb83bbd6787b86308ce4eb4e32d6cc27314d329c212c555d84efc6ad6cd39 -size 3375 diff --git a/checkpoint-2500/config.json b/checkpoint-2500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-2500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-2500/optimizer.pt b/checkpoint-2500/optimizer.pt deleted file mode 100644 index 16ac96f8bfa62a85699e2a6ca6ff0b9eec2bc0a0..0000000000000000000000000000000000000000 --- a/checkpoint-2500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:34b63ec8fba9f45cc23f17cd477edb607fe687bbf6970408c8b1d40a6d7bd58e -size 2254269 diff --git a/checkpoint-2500/pytorch_model.bin b/checkpoint-2500/pytorch_model.bin deleted file mode 100644 index 80c0463943e01a55f344c2636f67ad2f4cb098e4..0000000000000000000000000000000000000000 --- a/checkpoint-2500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:993f48728e40f168a6fa706bc86e083f7ebc9f3dbca70913e924835ded1b311c -size 2236955191 diff --git a/checkpoint-2500/rng_state.pth b/checkpoint-2500/rng_state.pth deleted file mode 100644 index 17e114315addd3b9d99707b37203994031fdd092..0000000000000000000000000000000000000000 --- a/checkpoint-2500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:528887aeaf571c1dd9d1789c0fad11e336830c7f10d9174d25b3f236cf9a2aa4 -size 14503 diff --git a/checkpoint-2500/scheduler.pt b/checkpoint-2500/scheduler.pt deleted file mode 100644 index 39f754e4e40932a231da06fd74e846d4e0c1c2a3..0000000000000000000000000000000000000000 --- a/checkpoint-2500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8848be77d5e16f8ad560a7262091b3d3fcd8d0f3fa50682054480c93bc684fe6 -size 623 diff --git a/checkpoint-2500/special_tokens_map.json b/checkpoint-2500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-2500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-2500/tokenizer.json b/checkpoint-2500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-2500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-2500/tokenizer_config.json b/checkpoint-2500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-2500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-2500/trainer_state.json b/checkpoint-2500/trainer_state.json deleted file mode 100644 index 2451e366573dd7b3e79e300f0898973fd01ee8be..0000000000000000000000000000000000000000 --- a/checkpoint-2500/trainer_state.json +++ /dev/null @@ -1,30 +0,0 @@ -{ - "best_metric": 3.457474708557129, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-2500", - "epoch": 18.115315315315314, - "global_step": 2500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 0.0008, - "loss": 3.676, - "step": 2500 - }, - { - "epoch": 18.12, - "eval_loss": 3.457474708557129, - "eval_runtime": 529.7099, - "eval_samples_per_second": 9.262, - "eval_steps_per_second": 4.631, - "step": 2500 - } - ], - "max_steps": 12500, - "num_train_epochs": 91, - "total_flos": 3.734888014833254e+16, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-2500/training_args.bin b/checkpoint-2500/training_args.bin deleted file mode 100644 index 8f9ccdb9ceae148427cb8811e8694d3ae87bf6a0..0000000000000000000000000000000000000000 --- a/checkpoint-2500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d03d7122884f8034582a568ae0aeb2a2c444d81b24122c204b152bc39ad0914e -size 3375 diff --git a/checkpoint-2500/wikiann-az-results.txt b/checkpoint-2500/wikiann-az-results.txt deleted file mode 100644 index bc5a9fe05790d456cad9c5c684b90cc9c844d672..0000000000000000000000000000000000000000 --- a/checkpoint-2500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-2500 -[0.36397631660953567, 0.35372247465921003, 0.37106918238993714, 0.38925706313219394, 0.38565022421524664, 0.3947001394700139, 0.3738019169329074, 0.3856186112090236, 0.3696027633851468, 0.38164251207729466] -37.69 -1.20 -================================================== \ No newline at end of file diff --git a/checkpoint-25000/config.json b/checkpoint-25000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-25000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-25000/optimizer.pt b/checkpoint-25000/optimizer.pt deleted file mode 100644 index 9af8101ef7c8db01ffb1e537062b5a77bd3818f3..0000000000000000000000000000000000000000 --- a/checkpoint-25000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cef01a84bfdeb1a3ef9a082f98cb7b1805a53bd0904fd2c24f8f90f983d4e27a -size 2254269 diff --git a/checkpoint-25000/pytorch_model.bin b/checkpoint-25000/pytorch_model.bin deleted file mode 100644 index 52d5fe430f6e18effdef24032e09b3d04b782bbc..0000000000000000000000000000000000000000 --- a/checkpoint-25000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f588939d875ea720bb846e2c764c5d3e1f6951f1312a761719e631ef4ee451b -size 2236955191 diff --git a/checkpoint-25000/rng_state.pth b/checkpoint-25000/rng_state.pth deleted file mode 100644 index 545d6d70e4c775d8fcb406ffd3cf3cbd6fa6fc3b..0000000000000000000000000000000000000000 --- a/checkpoint-25000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:428eed5020046e6148390cb9ce274d191f67b399dc054ac476c7ee249acde2c0 -size 14503 diff --git a/checkpoint-25000/scheduler.pt b/checkpoint-25000/scheduler.pt deleted file mode 100644 index 7ad3b44dd75ce7d8d5e7e1e604001842c0cc94ff..0000000000000000000000000000000000000000 --- a/checkpoint-25000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d68cbb33fa1a2e013758e6ff8a1c4cb984da09f9cb40986c80de7fb857fd18f -size 623 diff --git a/checkpoint-25000/special_tokens_map.json b/checkpoint-25000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-25000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-25000/tokenizer.json b/checkpoint-25000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-25000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-25000/tokenizer_config.json b/checkpoint-25000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-25000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-25000/trainer_state.json b/checkpoint-25000/trainer_state.json deleted file mode 100644 index f057f170a228408a9560ae7eeca9a920a50c967b..0000000000000000000000000000000000000000 --- a/checkpoint-25000/trainer_state.json +++ /dev/null @@ -1,116 +0,0 @@ -{ - "best_metric": 3.5487985610961914, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-25000", - "epoch": 181.15855855855855, - "global_step": 25000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 9e-05, - "loss": 4.2544, - "step": 2500 - }, - { - "epoch": 36.23, - "learning_rate": 8e-05, - "loss": 3.9123, - "step": 5000 - }, - { - "epoch": 36.23, - "eval_loss": 3.843057632446289, - "eval_runtime": 513.4541, - "eval_samples_per_second": 9.555, - "eval_steps_per_second": 4.777, - "step": 5000 - }, - { - "epoch": 54.35, - "learning_rate": 7e-05, - "loss": 3.7584, - "step": 7500 - }, - { - "epoch": 72.46, - "learning_rate": 6e-05, - "loss": 3.6674, - "step": 10000 - }, - { - "epoch": 72.46, - "eval_loss": 3.6707117557525635, - "eval_runtime": 512.9035, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 10000 - }, - { - "epoch": 90.58, - "learning_rate": 5e-05, - "loss": 3.6049, - "step": 12500 - }, - { - "epoch": 108.69, - "learning_rate": 4e-05, - "loss": 3.5633, - "step": 15000 - }, - { - "epoch": 108.69, - "eval_loss": 3.59525203704834, - "eval_runtime": 512.888, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 15000 - }, - { - "epoch": 126.81, - "learning_rate": 3e-05, - "loss": 3.5333, - "step": 17500 - }, - { - "epoch": 144.92, - "learning_rate": 2e-05, - "loss": 3.5125, - "step": 20000 - }, - { - "epoch": 144.92, - "eval_loss": 3.559532403945923, - "eval_runtime": 512.8641, - "eval_samples_per_second": 9.566, - "eval_steps_per_second": 4.783, - "step": 20000 - }, - { - "epoch": 163.04, - "learning_rate": 1e-05, - "loss": 3.501, - "step": 22500 - }, - { - "epoch": 181.16, - "learning_rate": 0.0, - "loss": 3.493, - "step": 25000 - }, - { - "epoch": 181.16, - "eval_loss": 3.5487985610961914, - "eval_runtime": 512.9062, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 25000 - } - ], - "max_steps": 25000, - "num_train_epochs": 182, - "total_flos": 3.7349994596715725e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-25000/training_args.bin b/checkpoint-25000/training_args.bin deleted file mode 100644 index d1628de4c6c12d20f7aa9b4f9eeef9664f72f107..0000000000000000000000000000000000000000 --- a/checkpoint-25000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:511eb83bbd6787b86308ce4eb4e32d6cc27314d329c212c555d84efc6ad6cd39 -size 3375 diff --git a/checkpoint-5000/config.json b/checkpoint-5000/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-5000/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt deleted file mode 100644 index 76a9eaaf9ba9699a0e30245d1f9b78ee1c32a06f..0000000000000000000000000000000000000000 --- a/checkpoint-5000/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1824ec1ba1fa4e07ee10f70a84b7c5629a19b90a4b0e55f8c4290487bff1e2af -size 2254269 diff --git a/checkpoint-5000/pytorch_model.bin b/checkpoint-5000/pytorch_model.bin deleted file mode 100644 index 8ff37d855eff22e2dea324666510beadb5d80098..0000000000000000000000000000000000000000 --- a/checkpoint-5000/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:185ff2f781bcf56f6d891e93c0d1e1068506a122f90a8ba7150c78d50746019e -size 2236955191 diff --git a/checkpoint-5000/rng_state.pth b/checkpoint-5000/rng_state.pth deleted file mode 100644 index 30eb066861b30bb66346248b9d62920b37a37280..0000000000000000000000000000000000000000 --- a/checkpoint-5000/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:082457f986b0d1ef96c8f53996c405814810d2873a182b13766a4eb141b6cfeb -size 14503 diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt deleted file mode 100644 index 90bfa33aa2e57caff6083bf68c3b38db47518ccd..0000000000000000000000000000000000000000 --- a/checkpoint-5000/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d4841973343260138ab6da195f6b9590db0a8465a2275f311ddcc72346193a56 -size 623 diff --git a/checkpoint-5000/special_tokens_map.json b/checkpoint-5000/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-5000/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-5000/tokenizer.json b/checkpoint-5000/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-5000/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-5000/tokenizer_config.json b/checkpoint-5000/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-5000/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json deleted file mode 100644 index 78dd894b85e6ea1d0076e37a4d570f8016058807..0000000000000000000000000000000000000000 --- a/checkpoint-5000/trainer_state.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "best_metric": 3.843057632446289, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-5000", - "epoch": 36.23063063063063, - "global_step": 5000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 9e-05, - "loss": 4.2544, - "step": 2500 - }, - { - "epoch": 36.23, - "learning_rate": 8e-05, - "loss": 3.9123, - "step": 5000 - }, - { - "epoch": 36.23, - "eval_loss": 3.843057632446289, - "eval_runtime": 513.4541, - "eval_samples_per_second": 9.555, - "eval_steps_per_second": 4.777, - "step": 5000 - } - ], - "max_steps": 25000, - "num_train_epochs": 182, - "total_flos": 7.469776029666509e+16, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin deleted file mode 100644 index d1628de4c6c12d20f7aa9b4f9eeef9664f72f107..0000000000000000000000000000000000000000 --- a/checkpoint-5000/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:511eb83bbd6787b86308ce4eb4e32d6cc27314d329c212c555d84efc6ad6cd39 -size 3375 diff --git a/checkpoint-5000/wikiann-az-results.txt b/checkpoint-5000/wikiann-az-results.txt deleted file mode 100644 index 58bbbf3a369c85df4645493fbce95acc95f94507..0000000000000000000000000000000000000000 --- a/checkpoint-5000/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-5000 -[0.3755703755703756, 0.36724738675958185, 0.3752199929602253, 0.3942375263527758, 0.3862212943632567, 0.39485575251998606, 0.37590861889927313, 0.3730291848373029, 0.3608815426997245, 0.3855001742767515] -37.89 -1.06 -================================================== \ No newline at end of file diff --git a/checkpoint-7500/config.json b/checkpoint-7500/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/checkpoint-7500/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/checkpoint-7500/optimizer.pt b/checkpoint-7500/optimizer.pt deleted file mode 100644 index 047a43647e55d83a9fa9a86b3687c1fafb50e1cc..0000000000000000000000000000000000000000 --- a/checkpoint-7500/optimizer.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:09b27ac09b6eba24bb36296b2cd7b10a8c52ac03372ce9ab64efa03343f3305c -size 2254269 diff --git a/checkpoint-7500/pytorch_model.bin b/checkpoint-7500/pytorch_model.bin deleted file mode 100644 index a374ea2fa797d0d51b57529f89a03175c11a2774..0000000000000000000000000000000000000000 --- a/checkpoint-7500/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d188ee24de004d455a09caa32d98b0ee122f1834fc7227fc9592d65c4fa8553f -size 2236955191 diff --git a/checkpoint-7500/rng_state.pth b/checkpoint-7500/rng_state.pth deleted file mode 100644 index 5a64a99531515dc6c71b2165b0f278fc38b83ea6..0000000000000000000000000000000000000000 --- a/checkpoint-7500/rng_state.pth +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:86f15ebe067d98a0b183db61960f98bf4e1c7cd13f6bed2881f3d3b7f362f182 -size 14503 diff --git a/checkpoint-7500/scheduler.pt b/checkpoint-7500/scheduler.pt deleted file mode 100644 index 92a7351ba15122d445557f2ca342b5c2a8f66242..0000000000000000000000000000000000000000 --- a/checkpoint-7500/scheduler.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3645d67727f305ed9f285de92859169b9ced76729a97a8143c6ece6d98a161d3 -size 623 diff --git a/checkpoint-7500/special_tokens_map.json b/checkpoint-7500/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/checkpoint-7500/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/checkpoint-7500/tokenizer.json b/checkpoint-7500/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/checkpoint-7500/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/checkpoint-7500/tokenizer_config.json b/checkpoint-7500/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/checkpoint-7500/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/checkpoint-7500/trainer_state.json b/checkpoint-7500/trainer_state.json deleted file mode 100644 index 544fdbbc7cd523e3c0191e7322d9b76f49f42324..0000000000000000000000000000000000000000 --- a/checkpoint-7500/trainer_state.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "best_metric": 3.292879581451416, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-7500", - "epoch": 54.34594594594594, - "global_step": 7500, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 0.0008, - "loss": 3.676, - "step": 2500 - }, - { - "epoch": 18.12, - "eval_loss": 3.457474708557129, - "eval_runtime": 529.7099, - "eval_samples_per_second": 9.262, - "eval_steps_per_second": 4.631, - "step": 2500 - }, - { - "epoch": 36.23, - "learning_rate": 0.0006, - "loss": 3.3098, - "step": 5000 - }, - { - "epoch": 36.23, - "eval_loss": 3.3395490646362305, - "eval_runtime": 529.196, - "eval_samples_per_second": 9.271, - "eval_steps_per_second": 4.635, - "step": 5000 - }, - { - "epoch": 54.35, - "learning_rate": 0.0004, - "loss": 3.2132, - "step": 7500 - }, - { - "epoch": 54.35, - "eval_loss": 3.292879581451416, - "eval_runtime": 528.2545, - "eval_samples_per_second": 9.287, - "eval_steps_per_second": 4.644, - "step": 7500 - } - ], - "max_steps": 12500, - "num_train_epochs": 91, - "total_flos": 1.1204664044499763e+17, - "trial_name": null, - "trial_params": null -} diff --git a/checkpoint-7500/training_args.bin b/checkpoint-7500/training_args.bin deleted file mode 100644 index 8f9ccdb9ceae148427cb8811e8694d3ae87bf6a0..0000000000000000000000000000000000000000 --- a/checkpoint-7500/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d03d7122884f8034582a568ae0aeb2a2c444d81b24122c204b152bc39ad0914e -size 3375 diff --git a/checkpoint-7500/wikiann-az-results.txt b/checkpoint-7500/wikiann-az-results.txt deleted file mode 100644 index 2905305f68c4e66a9e4b60812580b138241bd26e..0000000000000000000000000000000000000000 --- a/checkpoint-7500/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-7500 -[0.36720392431674836, 0.36719022687609065, 0.37202797202797205, 0.3932584269662921, 0.38228113010115106, 0.3930191972076789, 0.37373737373737376, 0.3661278988053408, 0.3737244897959183, 0.3873684210526316] -37.76 -1.00 -================================================== \ No newline at end of file diff --git a/config.json b/config.json deleted file mode 100644 index 01102e03e4779d2c43cb80a51c8459e256eba4ce..0000000000000000000000000000000000000000 --- a/config.json +++ /dev/null @@ -1,39 +0,0 @@ -{ - "_name_or_path": "bigscience/bloom-350m", - "adapters": { - "adapters": {}, - "config_map": {}, - "fusion_config_map": {}, - "fusions": {} - }, - "apply_residual_connection_post_layernorm": false, - "architectures": [ - "BloomForCausalLM" - ], - "attention_dropout": 0.0, - "attention_softmax_in_fp32": true, - "bias_dropout_fusion": true, - "bos_token_id": 1, - "eos_token_id": 2, - "hidden_dropout": 0.0, - "hidden_size": 1024, - "initializer_range": 0.02, - "layer_norm_epsilon": 1e-05, - "masked_softmax_fusion": true, - "model_type": "bloom", - "n_head": 16, - "n_inner": null, - "n_layer": 24, - "offset_alibi": 100, - "pad_token_id": 3, - "pretraining_tp": 1, - "seq_length": 2048, - "skip_bias_add": true, - "skip_bias_add_qkv": false, - "slow_but_exact": false, - "torch_dtype": "float32", - "transformers_version": "4.20.0.dev0", - "unk_token_id": 0, - "use_cache": true, - "vocab_size": 250880 -} diff --git a/eval_results.json b/eval_results.json deleted file mode 100644 index 525d32fbce4f0910f5a0db635c6f19b3dbc75a69..0000000000000000000000000000000000000000 --- a/eval_results.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "epoch": 181.16, - "eval_loss": 3.5487985610961914, - "eval_runtime": 512.7811, - "eval_samples": 4906, - "eval_samples_per_second": 9.567, - "eval_steps_per_second": 4.784, - "perplexity": 34.7715165292787 -} \ No newline at end of file diff --git a/pytorch_model.bin b/pytorch_model.bin deleted file mode 100644 index 52d5fe430f6e18effdef24032e09b3d04b782bbc..0000000000000000000000000000000000000000 --- a/pytorch_model.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2f588939d875ea720bb846e2c764c5d3e1f6951f1312a761719e631ef4ee451b -size 2236955191 diff --git a/special_tokens_map.json b/special_tokens_map.json deleted file mode 100644 index 25bc39604f72700b3b8e10bd69bb2f227157edd1..0000000000000000000000000000000000000000 --- a/special_tokens_map.json +++ /dev/null @@ -1 +0,0 @@ -{"bos_token": "", "eos_token": "", "unk_token": "", "pad_token": ""} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json deleted file mode 100644 index a4fa803e0e0e614382d27635cc90df38f96f41f0..0000000000000000000000000000000000000000 --- a/tokenizer.json +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f6efc66e73f1fd69da4f436e48befb519fdff3fe18910850c1d41bd862293a5 -size 14500443 diff --git a/tokenizer_config.json b/tokenizer_config.json deleted file mode 100644 index 149b7da28b1ab9fe6defc5878e01cdb16f445e11..0000000000000000000000000000000000000000 --- a/tokenizer_config.json +++ /dev/null @@ -1 +0,0 @@ -{"unk_token": "", "bos_token": "", "eos_token": "", "pad_token": "", "add_prefix_space": false, "name_or_path": "bigscience/bloom-350m", "special_tokens_map_file": null, "padding_side": "left", "tokenizer_class": "BloomTokenizer"} \ No newline at end of file diff --git a/train_results.json b/train_results.json deleted file mode 100644 index 54d482ca8df1c3160b78272ce41d604f8c46494d..0000000000000000000000000000000000000000 --- a/train_results.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "epoch": 181.16, - "train_loss": 3.6800425, - "train_runtime": 52880.0792, - "train_samples": 1000, - "train_samples_per_second": 3.782, - "train_steps_per_second": 0.473 -} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json deleted file mode 100644 index 78ec0a110aae72bc050f6cfcf2fbd475e6b8dcb8..0000000000000000000000000000000000000000 --- a/trainer_state.json +++ /dev/null @@ -1,125 +0,0 @@ -{ - "best_metric": 3.5487985610961914, - "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/checkpoint-25000", - "epoch": 181.15855855855855, - "global_step": 25000, - "is_hyper_param_search": false, - "is_local_process_zero": true, - "is_world_process_zero": true, - "log_history": [ - { - "epoch": 18.12, - "learning_rate": 9e-05, - "loss": 4.2544, - "step": 2500 - }, - { - "epoch": 36.23, - "learning_rate": 8e-05, - "loss": 3.9123, - "step": 5000 - }, - { - "epoch": 36.23, - "eval_loss": 3.843057632446289, - "eval_runtime": 513.4541, - "eval_samples_per_second": 9.555, - "eval_steps_per_second": 4.777, - "step": 5000 - }, - { - "epoch": 54.35, - "learning_rate": 7e-05, - "loss": 3.7584, - "step": 7500 - }, - { - "epoch": 72.46, - "learning_rate": 6e-05, - "loss": 3.6674, - "step": 10000 - }, - { - "epoch": 72.46, - "eval_loss": 3.6707117557525635, - "eval_runtime": 512.9035, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 10000 - }, - { - "epoch": 90.58, - "learning_rate": 5e-05, - "loss": 3.6049, - "step": 12500 - }, - { - "epoch": 108.69, - "learning_rate": 4e-05, - "loss": 3.5633, - "step": 15000 - }, - { - "epoch": 108.69, - "eval_loss": 3.59525203704834, - "eval_runtime": 512.888, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 15000 - }, - { - "epoch": 126.81, - "learning_rate": 3e-05, - "loss": 3.5333, - "step": 17500 - }, - { - "epoch": 144.92, - "learning_rate": 2e-05, - "loss": 3.5125, - "step": 20000 - }, - { - "epoch": 144.92, - "eval_loss": 3.559532403945923, - "eval_runtime": 512.8641, - "eval_samples_per_second": 9.566, - "eval_steps_per_second": 4.783, - "step": 20000 - }, - { - "epoch": 163.04, - "learning_rate": 1e-05, - "loss": 3.501, - "step": 22500 - }, - { - "epoch": 181.16, - "learning_rate": 0.0, - "loss": 3.493, - "step": 25000 - }, - { - "epoch": 181.16, - "eval_loss": 3.5487985610961914, - "eval_runtime": 512.9062, - "eval_samples_per_second": 9.565, - "eval_steps_per_second": 4.783, - "step": 25000 - }, - { - "epoch": 181.16, - "step": 25000, - "total_flos": 3.7349994596715725e+17, - "train_loss": 3.6800425, - "train_runtime": 52880.0792, - "train_samples_per_second": 3.782, - "train_steps_per_second": 0.473 - } - ], - "max_steps": 25000, - "num_train_epochs": 182, - "total_flos": 3.7349994596715725e+17, - "trial_name": null, - "trial_params": null -} diff --git a/training_args.bin b/training_args.bin deleted file mode 100644 index d1628de4c6c12d20f7aa9b4f9eeef9664f72f107..0000000000000000000000000000000000000000 --- a/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:511eb83bbd6787b86308ce4eb4e32d6cc27314d329c212c555d84efc6ad6cd39 -size 3375 diff --git a/wikiann-az-results.txt b/wikiann-az-results.txt deleted file mode 100644 index 9fbd409c86ffdac937c7636b1f9ff714bde3fd46..0000000000000000000000000000000000000000 --- a/wikiann-az-results.txt +++ /dev/null @@ -1,8 +0,0 @@ -================================================== -Results -================================================== -Model: /users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-350m_az_bitfit_1000samples_-1vocab_original-frozen/ -[0.06267515923566878, 0.011998154130133826, 0.040195341848234416, 0.0903098220171391, 0.08333333333333334, 0.04033884630899556, 0.05261744966442953, 0.072714916751614, 0.07329842931937172, 0.13276836158192093] -6.60 -3.14 -================================================== \ No newline at end of file diff --git a/word_embeddings.pt b/word_embeddings.pt deleted file mode 100644 index 2a4cbda005e5d2ade008b1110876ca6a87706b17..0000000000000000000000000000000000000000 --- a/word_embeddings.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:174ed618237771e5906be0e8d70c568de63633f3bb5e8a1e303bbdbaeaedc1ca -size 1027605867 diff --git a/word_embeddings_layernorm.pt b/word_embeddings_layernorm.pt deleted file mode 100644 index 1b8759b2a378472c0f17a4292a2a6276b8a3e07e..0000000000000000000000000000000000000000 --- a/word_embeddings_layernorm.pt +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:10917f86841a4f322406bd72ba2e4ae8e4780aaf462c98a76eca01e0c5fbc893 -size 9703