0xsuid commited on
Commit
0a384a0
β€’
1 Parent(s): 6c7a1dc

Upload experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154

Browse files
experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/configs.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "./results",
3
+ "evaluation_strategy": "no",
4
+ "do_eval": false,
5
+ "eval_steps": 0,
6
+ "log_level": "error",
7
+ "logging_first_step": true,
8
+ "logging_steps": 5,
9
+ "logging_dir": "./logs",
10
+ "save_steps": 200,
11
+ "save_total_limit": 2,
12
+ "num_train_epochs": 5,
13
+ "per_device_train_batch_size": 6,
14
+ "optim": "adamw_torch",
15
+ "dataloader_drop_last": true,
16
+ "warmup_steps": 500,
17
+ "weight_decay": 0.05,
18
+ "learning_rate": 5e-05,
19
+ "local_rank": -1,
20
+ "deepspeed": "deepspeed.json",
21
+ "total_gpus": 2,
22
+ "v_cpus": 16,
23
+ "total_memory_in_gb": 48190.40625,
24
+ "dataset_limit": 10
25
+ }
experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/deepspeed.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "optimizer": {
3
+ "type": "AdamW",
4
+ "params": {
5
+ "lr": "auto",
6
+ "betas": "auto",
7
+ "eps": "auto",
8
+ "weight_decay": "auto"
9
+ }
10
+ },
11
+
12
+ "scheduler": {
13
+ "type": "WarmupLR",
14
+ "params": {
15
+ "warmup_min_lr": "auto",
16
+ "warmup_max_lr": "auto",
17
+ "warmup_num_steps": "auto"
18
+ }
19
+ },
20
+
21
+ "zero_optimization": {
22
+ "stage": 2,
23
+ "offload_optimizer": {
24
+ "device": "cpu",
25
+ "pin_memory": true
26
+ },
27
+ "allgather_partitions": true,
28
+ "allgather_bucket_size": 5e8,
29
+ "overlap_comm": true,
30
+ "reduce_scatter": true,
31
+ "reduce_bucket_size": 5e8,
32
+ "contiguous_gradients": true
33
+ },
34
+
35
+ "tensorboard": {
36
+ "enabled": true,
37
+ "output_path": "logs/",
38
+ "job_name": "train_neo"
39
+ },
40
+
41
+ "zero_allow_untested_optimizer": true,
42
+ "gradient_accumulation_steps": "auto",
43
+ "gradient_clipping": "auto",
44
+ "steps_per_print": 2000,
45
+ "train_batch_size": "auto",
46
+ "train_micro_batch_size_per_gpu": "auto",
47
+ "wall_clock_breakdown": false
48
+ }
experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/final_checkpoint/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "EleutherAI/gpt-neo-125M",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPTNeoForCausalLM"
6
+ ],
7
+ "attention_dropout": 0,
8
+ "attention_layers": [
9
+ "global",
10
+ "local",
11
+ "global",
12
+ "local",
13
+ "global",
14
+ "local",
15
+ "global",
16
+ "local",
17
+ "global",
18
+ "local",
19
+ "global",
20
+ "local"
21
+ ],
22
+ "attention_types": [
23
+ [
24
+ [
25
+ "global",
26
+ "local"
27
+ ],
28
+ 6
29
+ ]
30
+ ],
31
+ "bos_token_id": 50256,
32
+ "embed_dropout": 0,
33
+ "eos_token_id": 50256,
34
+ "gradient_checkpointing": false,
35
+ "hidden_size": 768,
36
+ "initializer_range": 0.02,
37
+ "intermediate_size": null,
38
+ "layer_norm_epsilon": 1e-05,
39
+ "max_position_embeddings": 2048,
40
+ "model_type": "gpt_neo",
41
+ "num_heads": 12,
42
+ "num_layers": 12,
43
+ "resid_dropout": 0,
44
+ "summary_activation": null,
45
+ "summary_first_dropout": 0.1,
46
+ "summary_proj_to_labels": true,
47
+ "summary_type": "cls_index",
48
+ "summary_use_proj": true,
49
+ "torch_dtype": "float32",
50
+ "transformers_version": "4.25.1",
51
+ "use_cache": true,
52
+ "vocab_size": 50257,
53
+ "window_size": 256
54
+ }
experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/final_checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde597b4e4371138cd8adebf86513a8ce07063c528179c0ff6330eec2445d28c
3
+ size 551154684
experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/logs/1671446773.5511963/events.out.tfevents.1671446773.7459dae3-a471-4828-975b-f8e23ae7ab31.144520.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5060c28b0d5829c518e342be88110a091f75478b7ad87737f11a66b34dd0c45
3
+ size 5457
experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/logs/events.out.tfevents.1671446773.7459dae3-a471-4828-975b-f8e23ae7ab31.144520.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:874b6b4200d756fc7fdf469432978b3ee219e37e458298358aad97a799ba509e
3
+ size 6266
experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/logs/train_neo/events.out.tfevents.1671446763.7459dae3-a471-4828-975b-f8e23ae7ab31.144520.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a83957c6b655d9a4f65836a5b53c5bd8bbb3ab86b44400e2035d689b89de85d
3
+ size 6507
experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/output.log ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/55 [00:00<?, ?it/s]
1
  2%|▏ | 1/55 [00:01<01:13, 1.37s/it]
2
 
 
3
  2%|▏ | 1/55 [00:01<01:13, 1.37s/it]
4
  4%|β–Ž | 2/55 [00:02<01:13, 1.39s/it]
5
  5%|β–Œ | 3/55 [00:04<01:13, 1.42s/it]
6
  7%|β–‹ | 4/55 [00:05<01:12, 1.42s/it]
7
  9%|β–‰ | 5/55 [00:07<01:10, 1.41s/it]
8
 
 
9
  9%|β–‰ | 5/55 [00:07<01:10, 1.41s/it]
10
  11%|β–ˆ | 6/55 [00:08<01:09, 1.41s/it]
11
  13%|β–ˆβ–Ž | 7/55 [00:09<01:07, 1.41s/it]
12
  15%|β–ˆβ– | 8/55 [00:11<01:06, 1.41s/it]
13
  16%|β–ˆβ–‹ | 9/55 [00:12<01:04, 1.41s/it]
14
  18%|β–ˆβ–Š | 10/55 [00:14<01:03, 1.41s/it]
15
 
 
16
  18%|β–ˆβ–Š | 10/55 [00:14<01:03, 1.41s/it]
17
  20%|β–ˆβ–ˆ | 11/55 [00:15<01:02, 1.41s/it]
18
  22%|β–ˆβ–ˆβ– | 12/55 [00:16<01:00, 1.41s/it]
19
  24%|β–ˆβ–ˆβ–Ž | 13/55 [00:18<00:59, 1.41s/it]
20
  25%|β–ˆβ–ˆβ–Œ | 14/55 [00:19<00:57, 1.41s/it]
21
  27%|β–ˆβ–ˆβ–‹ | 15/55 [00:21<00:56, 1.41s/it]
22
 
 
23
  27%|β–ˆβ–ˆβ–‹ | 15/55 [00:21<00:56, 1.41s/it]
24
  29%|β–ˆβ–ˆβ–‰ | 16/55 [00:22<00:55, 1.41s/it]
25
  31%|β–ˆβ–ˆβ–ˆ | 17/55 [00:23<00:53, 1.41s/it]
26
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 18/55 [00:25<00:52, 1.41s/it]
27
  35%|β–ˆβ–ˆβ–ˆβ– | 19/55 [00:26<00:50, 1.41s/it]
28
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 20/55 [00:28<00:49, 1.41s/it]
29
 
 
30
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 20/55 [00:28<00:49, 1.41s/it]
31
  38%|β–ˆβ–ˆβ–ˆβ–Š | 21/55 [00:29<00:48, 1.41s/it]
32
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 22/55 [00:31<00:46, 1.41s/it]
33
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 23/55 [00:32<00:45, 1.41s/it]
34
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 24/55 [00:33<00:43, 1.41s/it]
35
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 25/55 [00:35<00:42, 1.41s/it]
36
 
 
37
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 25/55 [00:35<00:42, 1.41s/it]
38
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 26/55 [00:36<00:40, 1.40s/it]
39
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 27/55 [00:38<00:39, 1.40s/it]
40
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 28/55 [00:39<00:37, 1.40s/it]
41
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 29/55 [00:40<00:36, 1.40s/it]
42
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 30/55 [00:42<00:35, 1.41s/it]
43
 
 
44
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 30/55 [00:42<00:35, 1.41s/it]
45
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 31/55 [00:43<00:33, 1.41s/it]
46
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 32/55 [00:45<00:32, 1.41s/it]
47
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 33/55 [00:46<00:30, 1.41s/it]
48
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 34/55 [00:47<00:29, 1.41s/it]
49
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 35/55 [00:49<00:28, 1.41s/it]
50
 
 
51
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 35/55 [00:49<00:28, 1.41s/it]
52
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 36/55 [00:50<00:26, 1.41s/it]
53
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 37/55 [00:52<00:25, 1.42s/it]
54
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 38/55 [00:53<00:24, 1.42s/it]
55
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 39/55 [00:55<00:22, 1.42s/it]
56
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 40/55 [00:56<00:21, 1.42s/it]
57
 
 
58
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 40/55 [00:56<00:21, 1.42s/it]
59
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/55 [00:57<00:19, 1.42s/it]
60
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 42/55 [00:59<00:18, 1.42s/it]
61
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 43/55 [01:00<00:17, 1.42s/it]
62
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 44/55 [01:02<00:15, 1.42s/it]
63
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 45/55 [01:03<00:14, 1.42s/it]
64
 
 
65
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 45/55 [01:03<00:14, 1.42s/it]
66
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 46/55 [01:04<00:12, 1.41s/it]
67
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 47/55 [01:06<00:11, 1.41s/it]
68
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 48/55 [01:07<00:09, 1.41s/it]
69
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 49/55 [01:09<00:08, 1.41s/it]
70
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 50/55 [01:10<00:07, 1.41s/it]
71
 
 
72
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 50/55 [01:10<00:07, 1.41s/it]
73
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 51/55 [01:11<00:05, 1.42s/it]
74
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 52/55 [01:13<00:04, 1.42s/it]
75
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 53/55 [01:14<00:02, 1.42s/it]
76
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 54/55 [01:16<00:01, 1.42s/it]
77
 
 
 
78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nohup: ignoring input
2
+ [2022-12-19 10:45:52,072] [WARNING] [runner.py:179:fetch_hostfile] Unable to find hostfile, will proceed with training with local resources only.
3
+ [2022-12-19 10:45:52,087] [INFO] [runner.py:508:main] cmd = /usr/bin/python3 -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMCwgMV19 --master_addr=127.0.0.1 --master_port=29500 tune_gpt.py --deepspeed deepspeed.json --limit=10 --local_rank=-1
4
+ [2022-12-19 10:45:53,665] [INFO] [launch.py:142:main] WORLD INFO DICT: {'localhost': [0, 1]}
5
+ [2022-12-19 10:45:53,665] [INFO] [launch.py:148:main] nnodes=1, num_local_procs=2, node_rank=0
6
+ [2022-12-19 10:45:53,665] [INFO] [launch.py:161:main] global_rank_mapping=defaultdict(<class 'list'>, {'localhost': [0, 1]})
7
+ [2022-12-19 10:45:53,665] [INFO] [launch.py:162:main] dist_world_size=2
8
+ [2022-12-19 10:45:53,665] [INFO] [launch.py:164:main] Setting CUDA_VISIBLE_DEVICES=0,1
9
+ No config specified, defaulting to: apps/all
10
+ Found cached dataset apps (/home/user/.cache/huggingface/datasets/codeparrot___apps/all/0.0.0/04ac807715d07d6e5cc580f59cdc8213cd7dc4529d0bb819cca72c9f8e8c1aa5)
11
+ Max length: 2048
12
+ PyTorch: setting up devices
13
+ [2022-12-19 10:46:03,625] [INFO] [comm.py:654:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl
14
+ No config specified, defaulting to: apps/all
15
+ Found cached dataset apps (/home/user/.cache/huggingface/datasets/codeparrot___apps/all/0.0.0/04ac807715d07d6e5cc580f59cdc8213cd7dc4529d0bb819cca72c9f8e8c1aa5)
16
+ Max length: 2048
17
+ PyTorch: setting up devices
18
+ The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
19
+ GPU memory occupied: 3108 MB.
20
+ The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
21
+ GPU memory occupied: 3108 MB.
22
+ Using /home/user/.cache/torch_extensions/py38_cu116 as PyTorch extensions root...
23
+ Using /home/user/.cache/torch_extensions/py38_cu116 as PyTorch extensions root...
24
+ Detected CUDA files, patching ldflags
25
+ Emitting ninja build file /home/user/.cache/torch_extensions/py38_cu116/cpu_adam/build.ninja...
26
+ Building extension module cpu_adam...
27
+ Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
28
+ ninja: no work to do.
29
+ Loading extension module cpu_adam...
30
+ Time to load cpu_adam op: 2.6374411582946777 seconds
31
+ Loading extension module cpu_adam...
32
+ Time to load cpu_adam op: 2.604843854904175 seconds
33
+ Using /home/user/.cache/torch_extensions/py38_cu116 as PyTorch extensions root...
34
+ Using /home/user/.cache/torch_extensions/py38_cu116 as PyTorch extensions root...
35
+ Emitting ninja build file /home/user/.cache/torch_extensions/py38_cu116/utils/build.ninja...
36
+ Building extension module utils...
37
+ Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
38
+ ninja: no work to do.
39
+ Loading extension module utils...
40
+ Time to load utils op: 0.2902805805206299 seconds
41
+ Loading extension module utils...
42
+ Time to load utils op: 0.2025001049041748 seconds
43
+ Rank: 0 partition count [2] and sizes[(62599296, False)]
44
+ Rank: 1 partition count [2] and sizes[(62599296, False)]
45
+ Using /home/user/.cache/torch_extensions/py38_cu116 as PyTorch extensions root...
46
+ No modifications detected for re-loaded extension module utils, skipping build step...
47
+ Loading extension module utils...
48
+ Time to load utils op: 0.0009868144989013672 seconds
49
+ Using /home/user/.cache/torch_extensions/py38_cu116 as PyTorch extensions root...
50
+ No modifications detected for re-loaded extension module utils, skipping build step...
51
+ Loading extension module utils...
52
+ Time to load utils op: 0.0003800392150878906 seconds
53
+
54
  0%| | 0/55 [00:00<?, ?it/s]
55
  2%|▏ | 1/55 [00:01<01:13, 1.37s/it]
56
 
57
+
58
  2%|▏ | 1/55 [00:01<01:13, 1.37s/it]
59
  4%|β–Ž | 2/55 [00:02<01:13, 1.39s/it]
60
  5%|β–Œ | 3/55 [00:04<01:13, 1.42s/it]
61
  7%|β–‹ | 4/55 [00:05<01:12, 1.42s/it]
62
  9%|β–‰ | 5/55 [00:07<01:10, 1.41s/it]
63
 
64
+
65
  9%|β–‰ | 5/55 [00:07<01:10, 1.41s/it]
66
  11%|β–ˆ | 6/55 [00:08<01:09, 1.41s/it]
67
  13%|β–ˆβ–Ž | 7/55 [00:09<01:07, 1.41s/it]
68
  15%|β–ˆβ– | 8/55 [00:11<01:06, 1.41s/it]
69
  16%|β–ˆβ–‹ | 9/55 [00:12<01:04, 1.41s/it]
70
  18%|β–ˆβ–Š | 10/55 [00:14<01:03, 1.41s/it]
71
 
72
+
73
  18%|β–ˆβ–Š | 10/55 [00:14<01:03, 1.41s/it]
74
  20%|β–ˆβ–ˆ | 11/55 [00:15<01:02, 1.41s/it]
75
  22%|β–ˆβ–ˆβ– | 12/55 [00:16<01:00, 1.41s/it]
76
  24%|β–ˆβ–ˆβ–Ž | 13/55 [00:18<00:59, 1.41s/it]
77
  25%|β–ˆβ–ˆβ–Œ | 14/55 [00:19<00:57, 1.41s/it]
78
  27%|β–ˆβ–ˆβ–‹ | 15/55 [00:21<00:56, 1.41s/it]
79
 
80
+
81
  27%|β–ˆβ–ˆβ–‹ | 15/55 [00:21<00:56, 1.41s/it]
82
  29%|β–ˆβ–ˆβ–‰ | 16/55 [00:22<00:55, 1.41s/it]
83
  31%|β–ˆβ–ˆβ–ˆ | 17/55 [00:23<00:53, 1.41s/it]
84
  33%|β–ˆβ–ˆβ–ˆβ–Ž | 18/55 [00:25<00:52, 1.41s/it]
85
  35%|β–ˆβ–ˆβ–ˆβ– | 19/55 [00:26<00:50, 1.41s/it]
86
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 20/55 [00:28<00:49, 1.41s/it]
87
 
88
+
89
  36%|β–ˆβ–ˆβ–ˆβ–‹ | 20/55 [00:28<00:49, 1.41s/it]
90
  38%|β–ˆβ–ˆβ–ˆβ–Š | 21/55 [00:29<00:48, 1.41s/it]
91
  40%|β–ˆβ–ˆβ–ˆβ–ˆ | 22/55 [00:31<00:46, 1.41s/it]
92
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 23/55 [00:32<00:45, 1.41s/it]
93
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 24/55 [00:33<00:43, 1.41s/it]
94
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 25/55 [00:35<00:42, 1.41s/it]
95
 
96
+
97
  45%|β–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 25/55 [00:35<00:42, 1.41s/it]
98
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 26/55 [00:36<00:40, 1.40s/it]
99
  49%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 27/55 [00:38<00:39, 1.40s/it]
100
  51%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 28/55 [00:39<00:37, 1.40s/it]
101
  53%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 29/55 [00:40<00:36, 1.40s/it]
102
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 30/55 [00:42<00:35, 1.41s/it]
103
 
104
+
105
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 30/55 [00:42<00:35, 1.41s/it]
106
  56%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 31/55 [00:43<00:33, 1.41s/it]
107
  58%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 32/55 [00:45<00:32, 1.41s/it]
108
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 33/55 [00:46<00:30, 1.41s/it]
109
  62%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 34/55 [00:47<00:29, 1.41s/it]
110
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 35/55 [00:49<00:28, 1.41s/it]
111
 
112
+
113
  64%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 35/55 [00:49<00:28, 1.41s/it]
114
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 36/55 [00:50<00:26, 1.41s/it]
115
  67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 37/55 [00:52<00:25, 1.42s/it]
116
  69%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 38/55 [00:53<00:24, 1.42s/it]
117
  71%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 39/55 [00:55<00:22, 1.42s/it]
118
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 40/55 [00:56<00:21, 1.42s/it]
119
 
120
+
121
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 40/55 [00:56<00:21, 1.42s/it]
122
  75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 41/55 [00:57<00:19, 1.42s/it]
123
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 42/55 [00:59<00:18, 1.42s/it]
124
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 43/55 [01:00<00:17, 1.42s/it]
125
  80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 44/55 [01:02<00:15, 1.42s/it]
126
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 45/55 [01:03<00:14, 1.42s/it]
127
 
128
+
129
  82%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 45/55 [01:03<00:14, 1.42s/it]
130
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 46/55 [01:04<00:12, 1.41s/it]
131
  85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 47/55 [01:06<00:11, 1.41s/it]
132
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 48/55 [01:07<00:09, 1.41s/it]
133
  89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 49/55 [01:09<00:08, 1.41s/it]
134
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 50/55 [01:10<00:07, 1.41s/it]
135
 
136
+
137
  91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 50/55 [01:10<00:07, 1.41s/it]
138
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 51/55 [01:11<00:05, 1.42s/it]
139
  95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–| 52/55 [01:13<00:04, 1.42s/it]
140
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹| 53/55 [01:14<00:02, 1.42s/it]
141
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 54/55 [01:16<00:01, 1.42s/it]
142
 
143
+
144
+ Samples/second: 8.86
145
 
146
+
147
+
148
+
149
+ Time: 77.67
150
+ Samples/second: 8.88
151
+ GPU memory occupied: 44704 MB.
152
+ Traceback (most recent call last):
153
+ File "tune_gpt.py", line 223, in <module>
154
+ shutil.move(os.path.join(pwd_path, "output.log"), os.path.join(final_save_dir))
155
+ File "/usr/lib/python3.8/shutil.py", line 789, in move
156
+ raise Error("Destination path '%s' already exists" % real_dst)
157
+ shutil.Error: Destination path 'experiments/2022-12-19-ab8f3a39c84fea7f66bf71860384bbce5df5fb3523e7dabd22b35c3ecfefb154/output.log' already exists
158
+ [2022-12-19 10:47:33,785] [INFO] [launch.py:318:sigkill_handler] Killing subprocess 144520
159
+ [2022-12-19 10:47:33,786] [INFO] [launch.py:318:sigkill_handler] Killing subprocess 144521
160
+ [2022-12-19 10:47:33,793] [ERROR] [launch.py:324:sigkill_handler] ['/usr/bin/python3', '-u', 'tune_gpt.py', '--local_rank=1', '--deepspeed', 'deepspeed.json', '--limit=10', '--local_rank=-1'] exits with return code = 1