Upload experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff with huggingface_hub
Browse files- experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/configs.json +25 -0
- experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/final_checkpoint/config.json +54 -0
- experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/final_checkpoint/pytorch_model.bin +3 -0
- experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/logs/1666220848.4318457/events.out.tfevents.1666220848.recd68n2rbwktkbnt.4475.1 +3 -0
- experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/logs/events.out.tfevents.1666220848.recd68n2rbwktkbnt.4475.0 +3 -0
- experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/output.log +46 -0
experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/configs.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"output_dir": "./results",
|
3 |
+
"evaluation_strategy": "no",
|
4 |
+
"do_eval": false,
|
5 |
+
"eval_steps": 0,
|
6 |
+
"log_level": "error",
|
7 |
+
"logging_first_step": true,
|
8 |
+
"logging_steps": 5,
|
9 |
+
"logging_dir": "./logs",
|
10 |
+
"save_steps": 200,
|
11 |
+
"save_total_limit": 2,
|
12 |
+
"num_train_epochs": 5,
|
13 |
+
"per_device_train_batch_size": 12,
|
14 |
+
"optim": "adamw_torch",
|
15 |
+
"gradient_accumulation_steps": 4,
|
16 |
+
"dataloader_drop_last": true,
|
17 |
+
"warmup_steps": 100,
|
18 |
+
"weight_decay": 0.01,
|
19 |
+
"learning_rate": 5e-05,
|
20 |
+
"fp16": true,
|
21 |
+
"total_gpus": 1,
|
22 |
+
"v_cpus": 6,
|
23 |
+
"total_memory_in_gb": 23040.7890625,
|
24 |
+
"dataset_limit": 100
|
25 |
+
}
|
experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/final_checkpoint/config.json
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "EleutherAI/gpt-neo-125M",
|
3 |
+
"activation_function": "gelu_new",
|
4 |
+
"architectures": [
|
5 |
+
"GPTNeoForCausalLM"
|
6 |
+
],
|
7 |
+
"attention_dropout": 0,
|
8 |
+
"attention_layers": [
|
9 |
+
"global",
|
10 |
+
"local",
|
11 |
+
"global",
|
12 |
+
"local",
|
13 |
+
"global",
|
14 |
+
"local",
|
15 |
+
"global",
|
16 |
+
"local",
|
17 |
+
"global",
|
18 |
+
"local",
|
19 |
+
"global",
|
20 |
+
"local"
|
21 |
+
],
|
22 |
+
"attention_types": [
|
23 |
+
[
|
24 |
+
[
|
25 |
+
"global",
|
26 |
+
"local"
|
27 |
+
],
|
28 |
+
6
|
29 |
+
]
|
30 |
+
],
|
31 |
+
"bos_token_id": 50256,
|
32 |
+
"embed_dropout": 0,
|
33 |
+
"eos_token_id": 50256,
|
34 |
+
"gradient_checkpointing": false,
|
35 |
+
"hidden_size": 768,
|
36 |
+
"initializer_range": 0.02,
|
37 |
+
"intermediate_size": null,
|
38 |
+
"layer_norm_epsilon": 1e-05,
|
39 |
+
"max_position_embeddings": 2048,
|
40 |
+
"model_type": "gpt_neo",
|
41 |
+
"num_heads": 12,
|
42 |
+
"num_layers": 12,
|
43 |
+
"resid_dropout": 0,
|
44 |
+
"summary_activation": null,
|
45 |
+
"summary_first_dropout": 0.1,
|
46 |
+
"summary_proj_to_labels": true,
|
47 |
+
"summary_type": "cls_index",
|
48 |
+
"summary_use_proj": true,
|
49 |
+
"torch_dtype": "float32",
|
50 |
+
"transformers_version": "4.23.1",
|
51 |
+
"use_cache": true,
|
52 |
+
"vocab_size": 50259,
|
53 |
+
"window_size": 256
|
54 |
+
}
|
experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/final_checkpoint/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ede1dadf6abcfb32d6b34e431179bc5dab08c31b2637c3c4c3e2798f42b7859c
|
3 |
+
size 551191249
|
experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/logs/1666220848.4318457/events.out.tfevents.1666220848.recd68n2rbwktkbnt.4475.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec3fef782ad78bdeb10bbbcb409e2d7b969965e2f316c5571f8c16e5b188b3bc
|
3 |
+
size 5413
|
experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/logs/events.out.tfevents.1666220848.recd68n2rbwktkbnt.4475.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3a062804908bee000b28c420e1909b82c34a084b0730684d34eec9686a0dead
|
3 |
+
size 9341
|
experiments/2022-10-19-515cf3b9155fd406d5067b25b7a969d2fc7be8e238d63667d772142982e8e3ff/output.log
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0 |
0%| | 0/155 [00:00<?, ?it/s]
|
1 |
1%| | 1/155 [00:03<09:53, 3.85s/it]
|
2 |
|
|
|
3 |
1%| | 1/155 [00:03<09:53, 3.85s/it]
|
4 |
1%|β | 2/155 [00:07<08:58, 3.52s/it]
|
5 |
2%|β | 3/155 [00:10<08:38, 3.41s/it]
|
6 |
3%|β | 4/155 [00:13<08:29, 3.37s/it]
|
7 |
3%|β | 5/155 [00:17<08:22, 3.35s/it]
|
8 |
|
|
|
9 |
3%|β | 5/155 [00:17<08:22, 3.35s/it]
|
10 |
4%|β | 6/155 [00:20<08:16, 3.33s/it]
|
11 |
5%|β | 7/155 [00:23<08:12, 3.33s/it]
|
12 |
5%|β | 8/155 [00:26<08:08, 3.32s/it]
|
13 |
6%|β | 9/155 [00:30<08:04, 3.32s/it]
|
14 |
6%|β | 10/155 [00:33<08:01, 3.32s/it]
|
15 |
|
|
|
16 |
6%|β | 10/155 [00:33<08:01, 3.32s/it]
|
17 |
7%|β | 11/155 [00:36<07:57, 3.32s/it]
|
18 |
8%|β | 12/155 [00:40<07:54, 3.32s/it]
|
19 |
8%|β | 13/155 [00:43<07:51, 3.32s/it]
|
20 |
9%|β | 14/155 [00:46<07:48, 3.32s/it]
|
21 |
10%|β | 15/155 [00:50<07:45, 3.32s/it]
|
22 |
|
|
|
23 |
10%|β | 15/155 [00:50<07:45, 3.32s/it]
|
24 |
10%|β | 16/155 [00:53<07:41, 3.32s/it]
|
25 |
11%|β | 17/155 [00:56<07:38, 3.32s/it]
|
26 |
12%|ββ | 18/155 [01:00<07:35, 3.32s/it]
|
27 |
12%|ββ | 19/155 [01:03<07:32, 3.32s/it]
|
28 |
13%|ββ | 20/155 [01:06<07:28, 3.32s/it]
|
29 |
|
|
|
30 |
13%|ββ | 20/155 [01:06<07:28, 3.32s/it]
|
31 |
14%|ββ | 21/155 [01:10<07:25, 3.32s/it]
|
32 |
14%|ββ | 22/155 [01:13<07:22, 3.33s/it]
|
33 |
15%|ββ | 23/155 [01:16<07:19, 3.33s/it]
|
34 |
15%|ββ | 24/155 [01:20<07:15, 3.33s/it]
|
35 |
16%|ββ | 25/155 [01:23<07:12, 3.33s/it]
|
36 |
|
|
|
37 |
16%|ββ | 25/155 [01:23<07:12, 3.33s/it]
|
38 |
17%|ββ | 26/155 [01:26<07:09, 3.33s/it]
|
39 |
17%|ββ | 27/155 [01:30<07:06, 3.33s/it]
|
40 |
18%|ββ | 28/155 [01:33<07:02, 3.33s/it]
|
41 |
19%|ββ | 29/155 [01:36<06:59, 3.33s/it]
|
42 |
19%|ββ | 30/155 [01:40<06:56, 3.33s/it]
|
43 |
|
|
|
44 |
19%|ββ | 30/155 [01:40<06:56, 3.33s/it]
|
45 |
20%|ββ | 31/155 [01:43<06:53, 3.33s/it]
|
46 |
21%|ββ | 32/155 [01:49<08:21, 4.08s/it]
|
47 |
21%|βββ | 33/155 [01:52<07:50, 3.85s/it]
|
48 |
22%|βββ | 34/155 [01:55<07:27, 3.70s/it]
|
49 |
23%|βββ | 35/155 [01:59<07:10, 3.59s/it]
|
50 |
|
|
|
51 |
23%|βββ | 35/155 [01:59<07:10, 3.59s/it]
|
52 |
23%|βββ | 36/155 [02:02<06:57, 3.51s/it]
|
53 |
24%|βββ | 37/155 [02:05<06:48, 3.46s/it]
|
54 |
25%|βββ | 38/155 [02:09<06:40, 3.42s/it]
|
55 |
25%|βββ | 39/155 [02:12<06:33, 3.39s/it]
|
56 |
26%|βββ | 40/155 [02:15<06:27, 3.37s/it]
|
57 |
|
|
|
58 |
26%|βββ | 40/155 [02:15<06:27, 3.37s/it]
|
59 |
26%|βββ | 41/155 [02:19<06:23, 3.36s/it]
|
60 |
27%|βββ | 42/155 [02:22<06:18, 3.35s/it]
|
61 |
28%|βββ | 43/155 [02:25<06:14, 3.34s/it]
|
62 |
28%|βββ | 44/155 [02:29<06:10, 3.34s/it]
|
63 |
29%|βββ | 45/155 [02:32<06:06, 3.34s/it]
|
64 |
|
|
|
65 |
29%|βββ | 45/155 [02:32<06:06, 3.34s/it]
|
66 |
30%|βββ | 46/155 [02:35<06:03, 3.33s/it]
|
67 |
30%|βββ | 47/155 [02:39<05:59, 3.33s/it]
|
68 |
31%|βββ | 48/155 [02:42<05:56, 3.33s/it]
|
69 |
32%|βββοΏ½οΏ½οΏ½ | 49/155 [02:45<05:53, 3.33s/it]
|
70 |
32%|ββββ | 50/155 [02:49<05:49, 3.33s/it]
|
71 |
|
|
|
72 |
32%|ββββ | 50/155 [02:49<05:49, 3.33s/it]
|
73 |
33%|ββββ | 51/155 [02:52<05:46, 3.33s/it]
|
74 |
34%|ββββ | 52/155 [02:55<05:43, 3.33s/it]
|
75 |
34%|ββββ | 53/155 [02:59<05:39, 3.33s/it]
|
76 |
35%|ββββ | 54/155 [03:02<05:36, 3.33s/it]
|
77 |
35%|ββββ | 55/155 [03:05<05:33, 3.33s/it]
|
78 |
|
|
|
79 |
35%|ββββ | 55/155 [03:05<05:33, 3.33s/it]
|
80 |
36%|ββββ | 56/155 [03:09<05:29, 3.33s/it]
|
81 |
37%|ββββ | 57/155 [03:12<05:26, 3.33s/it]
|
82 |
37%|ββββ | 58/155 [03:15<05:22, 3.33s/it]
|
83 |
38%|ββββ | 59/155 [03:19<05:19, 3.33s/it]
|
84 |
39%|ββββ | 60/155 [03:22<05:16, 3.33s/it]
|
85 |
|
|
|
86 |
39%|ββββ | 60/155 [03:22<05:16, 3.33s/it]
|
87 |
39%|ββββ | 61/155 [03:25<05:12, 3.33s/it]
|
88 |
40%|ββββ | 62/155 [03:29<05:09, 3.33s/it]
|
89 |
41%|ββββ | 63/155 [03:34<06:14, 4.07s/it]
|
90 |
41%|βββββ | 64/155 [03:38<05:50, 3.85s/it]
|
91 |
42%|βββββ | 65/155 [03:41<05:32, 3.69s/it]
|
92 |
|
|
|
93 |
42%|βββββ | 65/155 [03:41<05:32, 3.69s/it]
|
94 |
43%|βββββ | 66/155 [03:44<05:19, 3.59s/it]
|
95 |
43%|βββββ | 67/155 [03:48<05:08, 3.51s/it]
|
96 |
44%|βββββ | 68/155 [03:51<05:00, 3.45s/it]
|
97 |
45%|βββββ | 69/155 [03:54<04:53, 3.42s/it]
|
98 |
45%|βββββ | 70/155 [03:58<04:48, 3.39s/it]
|
99 |
|
|
|
100 |
45%|βββββ | 70/155 [03:58<04:48, 3.39s/it]
|
101 |
46%|βββββ | 71/155 [04:01<04:43, 3.37s/it]
|
102 |
46%|βββββ | 72/155 [04:04<04:38, 3.36s/it]
|
103 |
47%|βββββ | 73/155 [04:08<04:34, 3.35s/it]
|
104 |
48%|βββββ | 74/155 [04:11<04:30, 3.34s/it]
|
105 |
48%|βββββ | 75/155 [04:14<04:27, 3.34s/it]
|
106 |
|
|
|
107 |
48%|βββββ | 75/155 [04:14<04:27, 3.34s/it]
|
108 |
49%|βββββ | 76/155 [04:18<04:23, 3.34s/it]
|
109 |
50%|βββββ | 77/155 [04:21<04:20, 3.33s/it]
|
110 |
50%|βββββ | 78/155 [04:24<04:16, 3.33s/it]
|
111 |
51%|βββββ | 79/155 [04:28<04:13, 3.33s/it]
|
112 |
52%|ββββββ | 80/155 [04:31<04:09, 3.33s/it]
|
113 |
|
|
|
114 |
52%|ββββββ | 80/155 [04:31<04:09, 3.33s/it]
|
115 |
52%|ββββββ | 81/155 [04:34<04:06, 3.33s/it]
|
116 |
53%|ββββββ | 82/155 [04:38<04:03, 3.33s/it]
|
117 |
54%|ββββββ | 83/155 [04:41<03:59, 3.33s/it]
|
118 |
54%|ββββββ | 84/155 [04:44<03:56, 3.33s/it]
|
119 |
55%|ββββββ | 85/155 [04:48<03:52, 3.33s/it]
|
120 |
|
|
|
121 |
55%|ββββββ | 85/155 [04:48<03:52, 3.33s/it]
|
122 |
55%|ββββββ | 86/155 [04:51<03:49, 3.33s/it]
|
123 |
56%|ββββββ | 87/155 [04:54<03:46, 3.33s/it]
|
124 |
57%|ββββββ | 88/155 [04:58<03:43, 3.33s/it]
|
125 |
57%|ββββββ | 89/155 [05:01<03:39, 3.33s/it]
|
126 |
58%|ββββββ | 90/155 [05:04<03:36, 3.33s/it]
|
127 |
|
|
|
128 |
58%|ββββββ | 90/155 [05:04<03:36, 3.33s/it]
|
129 |
59%|ββββββ | 91/155 [05:08<03:33, 3.33s/it]
|
130 |
59%|ββββββ | 92/155 [05:11<03:29, 3.33s/it]
|
131 |
60%|ββββββ | 93/155 [05:14<03:26, 3.33s/it]
|
132 |
61%|ββββββ | 94/155 [05:20<04:08, 4.07s/it]
|
133 |
61%|βββββββ | 95/155 [05:23<03:51, 3.85s/it]
|
134 |
|
|
|
135 |
61%|βββββββ | 95/155 [05:23<03:51, 3.85s/it]
|
136 |
62%|βββββββ | 96/155 [05:27<03:37, 3.69s/it]
|
137 |
63%|βββββββ | 97/155 [05:30<03:27, 3.58s/it]
|
138 |
63%|βββββββ | 98/155 [05:33<03:19, 3.51s/it]
|
139 |
64%|βββββββ | 99/155 [05:37<03:13, 3.45s/it]
|
140 |
65%|βββββββ | 100/155 [05:40<03:07, 3.42s/it]
|
141 |
|
|
|
142 |
65%|βββββββ | 100/155 [05:40<03:07, 3.42s/it]
|
143 |
65%|βββββββ | 101/155 [05:43<03:03, 3.39s/it]
|
144 |
66%|βββββββ | 102/155 [05:47<02:58, 3.37s/it]
|
145 |
66%|βοΏ½οΏ½οΏ½βββββ | 103/155 [05:50<02:54, 3.36s/it]
|
146 |
67%|βββββββ | 104/155 [05:53<02:50, 3.35s/it]
|
147 |
68%|βββββββ | 105/155 [05:57<02:46, 3.34s/it]
|
148 |
|
|
|
149 |
68%|βββββββ | 105/155 [05:57<02:46, 3.34s/it]
|
150 |
68%|βββββββ | 106/155 [06:00<02:43, 3.34s/it]
|
151 |
69%|βββββββ | 107/155 [06:03<02:39, 3.33s/it]
|
152 |
70%|βββββββ | 108/155 [06:07<02:36, 3.33s/it]
|
153 |
70%|βββββββ | 109/155 [06:10<02:33, 3.33s/it]
|
154 |
71%|βββββββ | 110/155 [06:13<02:29, 3.33s/it]
|
155 |
|
|
|
156 |
71%|βββββββ | 110/155 [06:13<02:29, 3.33s/it]
|
157 |
72%|ββββββββ | 111/155 [06:17<02:26, 3.33s/it]
|
158 |
72%|ββββββββ | 112/155 [06:20<02:23, 3.33s/it]
|
159 |
73%|ββββββββ | 113/155 [06:23<02:19, 3.33s/it]
|
160 |
74%|ββββββββ | 114/155 [06:27<02:16, 3.33s/it]
|
161 |
74%|ββββββββ | 115/155 [06:30<02:13, 3.33s/it]
|
162 |
|
|
|
163 |
74%|ββββββββ | 115/155 [06:30<02:13, 3.33s/it]
|
164 |
75%|ββββββββ | 116/155 [06:33<02:09, 3.33s/it]
|
165 |
75%|ββββββββ | 117/155 [06:37<02:06, 3.33s/it]
|
166 |
76%|ββββββββ | 118/155 [06:40<02:03, 3.33s/it]
|
167 |
77%|ββββββββ | 119/155 [06:43<01:59, 3.33s/it]
|
168 |
77%|ββββββββ | 120/155 [06:47<01:56, 3.33s/it]
|
169 |
|
|
|
170 |
77%|ββββββββ | 120/155 [06:47<01:56, 3.33s/it]
|
171 |
78%|ββββββββ | 121/155 [06:50<01:53, 3.33s/it]
|
172 |
79%|ββββββββ | 122/155 [06:53<01:49, 3.33s/it]
|
173 |
79%|ββββββββ | 123/155 [06:57<01:46, 3.32s/it]
|
174 |
80%|ββββββββ | 124/155 [07:00<01:43, 3.32s/it]
|
175 |
81%|ββββββββ | 125/155 [07:06<02:02, 4.07s/it]
|
176 |
|
|
|
177 |
81%|ββββββββ | 125/155 [07:06<02:02, 4.07s/it]
|
178 |
81%|βββββββββ | 126/155 [07:09<01:51, 3.85s/it]
|
179 |
82%|βββββββββ | 127/155 [07:12<01:43, 3.69s/it]
|
180 |
83%|βββββββββ | 128/155 [07:16<01:36, 3.58s/it]
|
181 |
83%|βββββββββ | 129/155 [07:19<01:31, 3.50s/it]
|
182 |
84%|βββββββββ | 130/155 [07:22<01:26, 3.45s/it]
|
183 |
|
|
|
184 |
84%|βββββββββ | 130/155 [07:22<01:26, 3.45s/it]
|
185 |
85%|βββββββββ | 131/155 [07:26<01:21, 3.41s/it]
|
186 |
85%|βββββββββ | 132/155 [07:29<01:17, 3.39s/it]
|
187 |
86%|βββββββββ | 133/155 [07:32<01:14, 3.37s/it]
|
188 |
86%|βββββββββ | 134/155 [07:36<01:10, 3.36s/it]
|
189 |
87%|βββββββββ | 135/155 [07:39<01:06, 3.35s/it]
|
190 |
|
|
|
191 |
87%|βββββββββ | 135/155 [07:39<01:06, 3.35s/it]
|
192 |
88%|βββββββββ | 136/155 [07:42<01:03, 3.34s/it]
|
193 |
88%|βββββββββ | 137/155 [07:46<01:00, 3.33s/it]
|
194 |
89%|βββββββββ | 138/155 [07:49<00:56, 3.33s/it]
|
195 |
90%|βββββββββ | 139/155 [07:52<00:53, 3.33s/it]
|
196 |
90%|βββββββββ | 140/155 [07:56<00:49, 3.33s/it]
|
197 |
|
|
|
198 |
90%|βββββββββ | 140/155 [07:56<00:49, 3.33s/it]
|
199 |
91%|βββββββββ | 141/155 [07:59<00:46, 3.33s/it]
|
200 |
92%|ββββββββββ| 142/155 [08:02<00:43, 3.32s/it]
|
201 |
92%|ββββββββββ| 143/155 [08:06<00:39, 3.32s/it]
|
202 |
93%|ββββββββββ| 144/155 [08:09<00:36, 3.32s/it]
|
203 |
94%|ββββββββββ| 145/155 [08:12<00:33, 3.32s/it]
|
204 |
|
|
|
205 |
94%|ββββββββββ| 145/155 [08:12<00:33, 3.32s/it]
|
206 |
94%|ββββββββββ| 146/155 [08:16<00:29, 3.32s/it]
|
207 |
95%|ββββββββββ| 147/155 [08:19<00:26, 3.32s/it]
|
208 |
95%|ββββββββββ| 148/155 [08:22<00:23, 3.32s/it]
|
209 |
96%|ββββββββββ| 149/155 [08:26<00:19, 3.33s/it]
|
210 |
97%|ββββββββββ| 150/155 [08:29<00:16, 3.33s/it]
|
211 |
|
|
|
212 |
97%|ββββββββββ| 150/155 [08:29<00:16, 3.33s/it]
|
213 |
97%|ββββββββββ| 151/155 [08:32<00:13, 3.33s/it]
|
214 |
98%|ββββββββββ| 152/155 [08:36<00:09, 3.33s/it]
|
215 |
99%|ββββββββββ| 153/155 [08:39<00:06, 3.33s/it]
|
216 |
99%|ββββββββββ| 154/155 [08:42<00:03, 3.33s/it]
|
217 |
|
|
|
218 |
|
|
|
|
|
|
|
|
|
|
1 |
+
nohup: ignoring input
|
2 |
+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
3 |
+
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
4 |
+
No config specified, defaulting to: apps/all
|
5 |
+
Found cached dataset apps (/home/user/.cache/huggingface/datasets/codeparrot___apps/all/0.0.0/04ac807715d07d6e5cc580f59cdc8213cd7dc4529d0bb819cca72c9f8e8c1aa5)
|
6 |
+
Max length: 2048
|
7 |
+
PyTorch: setting up devices
|
8 |
+
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
|
9 |
+
GPU memory occupied: 2667 MB.
|
10 |
+
|
11 |
0%| | 0/155 [00:00<?, ?it/s]
|
12 |
1%| | 1/155 [00:03<09:53, 3.85s/it]
|
13 |
|
14 |
+
|
15 |
1%| | 1/155 [00:03<09:53, 3.85s/it]
|
16 |
1%|β | 2/155 [00:07<08:58, 3.52s/it]
|
17 |
2%|β | 3/155 [00:10<08:38, 3.41s/it]
|
18 |
3%|β | 4/155 [00:13<08:29, 3.37s/it]
|
19 |
3%|β | 5/155 [00:17<08:22, 3.35s/it]
|
20 |
|
21 |
+
|
22 |
3%|β | 5/155 [00:17<08:22, 3.35s/it]
|
23 |
4%|β | 6/155 [00:20<08:16, 3.33s/it]
|
24 |
5%|β | 7/155 [00:23<08:12, 3.33s/it]
|
25 |
5%|β | 8/155 [00:26<08:08, 3.32s/it]
|
26 |
6%|β | 9/155 [00:30<08:04, 3.32s/it]
|
27 |
6%|β | 10/155 [00:33<08:01, 3.32s/it]
|
28 |
|
29 |
+
|
30 |
6%|β | 10/155 [00:33<08:01, 3.32s/it]
|
31 |
7%|β | 11/155 [00:36<07:57, 3.32s/it]
|
32 |
8%|β | 12/155 [00:40<07:54, 3.32s/it]
|
33 |
8%|β | 13/155 [00:43<07:51, 3.32s/it]
|
34 |
9%|β | 14/155 [00:46<07:48, 3.32s/it]
|
35 |
10%|β | 15/155 [00:50<07:45, 3.32s/it]
|
36 |
|
37 |
+
|
38 |
10%|β | 15/155 [00:50<07:45, 3.32s/it]
|
39 |
10%|β | 16/155 [00:53<07:41, 3.32s/it]
|
40 |
11%|β | 17/155 [00:56<07:38, 3.32s/it]
|
41 |
12%|ββ | 18/155 [01:00<07:35, 3.32s/it]
|
42 |
12%|ββ | 19/155 [01:03<07:32, 3.32s/it]
|
43 |
13%|ββ | 20/155 [01:06<07:28, 3.32s/it]
|
44 |
|
45 |
+
|
46 |
13%|ββ | 20/155 [01:06<07:28, 3.32s/it]
|
47 |
14%|ββ | 21/155 [01:10<07:25, 3.32s/it]
|
48 |
14%|ββ | 22/155 [01:13<07:22, 3.33s/it]
|
49 |
15%|ββ | 23/155 [01:16<07:19, 3.33s/it]
|
50 |
15%|ββ | 24/155 [01:20<07:15, 3.33s/it]
|
51 |
16%|ββ | 25/155 [01:23<07:12, 3.33s/it]
|
52 |
|
53 |
+
|
54 |
16%|ββ | 25/155 [01:23<07:12, 3.33s/it]
|
55 |
17%|ββ | 26/155 [01:26<07:09, 3.33s/it]
|
56 |
17%|ββ | 27/155 [01:30<07:06, 3.33s/it]
|
57 |
18%|ββ | 28/155 [01:33<07:02, 3.33s/it]
|
58 |
19%|ββ | 29/155 [01:36<06:59, 3.33s/it]
|
59 |
19%|ββ | 30/155 [01:40<06:56, 3.33s/it]
|
60 |
|
61 |
+
|
62 |
19%|ββ | 30/155 [01:40<06:56, 3.33s/it]
|
63 |
20%|ββ | 31/155 [01:43<06:53, 3.33s/it]
|
64 |
21%|ββ | 32/155 [01:49<08:21, 4.08s/it]
|
65 |
21%|βββ | 33/155 [01:52<07:50, 3.85s/it]
|
66 |
22%|βββ | 34/155 [01:55<07:27, 3.70s/it]
|
67 |
23%|βββ | 35/155 [01:59<07:10, 3.59s/it]
|
68 |
|
69 |
+
|
70 |
23%|βββ | 35/155 [01:59<07:10, 3.59s/it]
|
71 |
23%|βββ | 36/155 [02:02<06:57, 3.51s/it]
|
72 |
24%|βββ | 37/155 [02:05<06:48, 3.46s/it]
|
73 |
25%|βββ | 38/155 [02:09<06:40, 3.42s/it]
|
74 |
25%|βββ | 39/155 [02:12<06:33, 3.39s/it]
|
75 |
26%|βββ | 40/155 [02:15<06:27, 3.37s/it]
|
76 |
|
77 |
+
|
78 |
26%|βββ | 40/155 [02:15<06:27, 3.37s/it]
|
79 |
26%|βββ | 41/155 [02:19<06:23, 3.36s/it]
|
80 |
27%|βββ | 42/155 [02:22<06:18, 3.35s/it]
|
81 |
28%|βββ | 43/155 [02:25<06:14, 3.34s/it]
|
82 |
28%|βββ | 44/155 [02:29<06:10, 3.34s/it]
|
83 |
29%|βββ | 45/155 [02:32<06:06, 3.34s/it]
|
84 |
|
85 |
+
|
86 |
29%|βββ | 45/155 [02:32<06:06, 3.34s/it]
|
87 |
30%|βββ | 46/155 [02:35<06:03, 3.33s/it]
|
88 |
30%|βββ | 47/155 [02:39<05:59, 3.33s/it]
|
89 |
31%|βββ | 48/155 [02:42<05:56, 3.33s/it]
|
90 |
32%|βββοΏ½οΏ½οΏ½ | 49/155 [02:45<05:53, 3.33s/it]
|
91 |
32%|ββββ | 50/155 [02:49<05:49, 3.33s/it]
|
92 |
|
93 |
+
|
94 |
32%|ββββ | 50/155 [02:49<05:49, 3.33s/it]
|
95 |
33%|ββββ | 51/155 [02:52<05:46, 3.33s/it]
|
96 |
34%|ββββ | 52/155 [02:55<05:43, 3.33s/it]
|
97 |
34%|ββββ | 53/155 [02:59<05:39, 3.33s/it]
|
98 |
35%|ββββ | 54/155 [03:02<05:36, 3.33s/it]
|
99 |
35%|ββββ | 55/155 [03:05<05:33, 3.33s/it]
|
100 |
|
101 |
+
|
102 |
35%|ββββ | 55/155 [03:05<05:33, 3.33s/it]
|
103 |
36%|ββββ | 56/155 [03:09<05:29, 3.33s/it]
|
104 |
37%|ββββ | 57/155 [03:12<05:26, 3.33s/it]
|
105 |
37%|ββββ | 58/155 [03:15<05:22, 3.33s/it]
|
106 |
38%|ββββ | 59/155 [03:19<05:19, 3.33s/it]
|
107 |
39%|ββββ | 60/155 [03:22<05:16, 3.33s/it]
|
108 |
|
109 |
+
|
110 |
39%|ββββ | 60/155 [03:22<05:16, 3.33s/it]
|
111 |
39%|ββββ | 61/155 [03:25<05:12, 3.33s/it]
|
112 |
40%|ββββ | 62/155 [03:29<05:09, 3.33s/it]
|
113 |
41%|ββββ | 63/155 [03:34<06:14, 4.07s/it]
|
114 |
41%|βββββ | 64/155 [03:38<05:50, 3.85s/it]
|
115 |
42%|βββββ | 65/155 [03:41<05:32, 3.69s/it]
|
116 |
|
117 |
+
|
118 |
42%|βββββ | 65/155 [03:41<05:32, 3.69s/it]
|
119 |
43%|βββββ | 66/155 [03:44<05:19, 3.59s/it]
|
120 |
43%|βββββ | 67/155 [03:48<05:08, 3.51s/it]
|
121 |
44%|βββββ | 68/155 [03:51<05:00, 3.45s/it]
|
122 |
45%|βββββ | 69/155 [03:54<04:53, 3.42s/it]
|
123 |
45%|βββββ | 70/155 [03:58<04:48, 3.39s/it]
|
124 |
|
125 |
+
|
126 |
45%|βββββ | 70/155 [03:58<04:48, 3.39s/it]
|
127 |
46%|βββββ | 71/155 [04:01<04:43, 3.37s/it]
|
128 |
46%|βββββ | 72/155 [04:04<04:38, 3.36s/it]
|
129 |
47%|βββββ | 73/155 [04:08<04:34, 3.35s/it]
|
130 |
48%|βββββ | 74/155 [04:11<04:30, 3.34s/it]
|
131 |
48%|βββββ | 75/155 [04:14<04:27, 3.34s/it]
|
132 |
|
133 |
+
|
134 |
48%|βββββ | 75/155 [04:14<04:27, 3.34s/it]
|
135 |
49%|βββββ | 76/155 [04:18<04:23, 3.34s/it]
|
136 |
50%|βββββ | 77/155 [04:21<04:20, 3.33s/it]
|
137 |
50%|βββββ | 78/155 [04:24<04:16, 3.33s/it]
|
138 |
51%|βββββ | 79/155 [04:28<04:13, 3.33s/it]
|
139 |
52%|ββββββ | 80/155 [04:31<04:09, 3.33s/it]
|
140 |
|
141 |
+
|
142 |
52%|ββββββ | 80/155 [04:31<04:09, 3.33s/it]
|
143 |
52%|ββββββ | 81/155 [04:34<04:06, 3.33s/it]
|
144 |
53%|ββββββ | 82/155 [04:38<04:03, 3.33s/it]
|
145 |
54%|ββββββ | 83/155 [04:41<03:59, 3.33s/it]
|
146 |
54%|ββββββ | 84/155 [04:44<03:56, 3.33s/it]
|
147 |
55%|ββββββ | 85/155 [04:48<03:52, 3.33s/it]
|
148 |
|
149 |
+
|
150 |
55%|ββββββ | 85/155 [04:48<03:52, 3.33s/it]
|
151 |
55%|ββββββ | 86/155 [04:51<03:49, 3.33s/it]
|
152 |
56%|ββββββ | 87/155 [04:54<03:46, 3.33s/it]
|
153 |
57%|ββββββ | 88/155 [04:58<03:43, 3.33s/it]
|
154 |
57%|ββββββ | 89/155 [05:01<03:39, 3.33s/it]
|
155 |
58%|ββββββ | 90/155 [05:04<03:36, 3.33s/it]
|
156 |
|
157 |
+
|
158 |
58%|ββββββ | 90/155 [05:04<03:36, 3.33s/it]
|
159 |
59%|ββββββ | 91/155 [05:08<03:33, 3.33s/it]
|
160 |
59%|ββββββ | 92/155 [05:11<03:29, 3.33s/it]
|
161 |
60%|ββββββ | 93/155 [05:14<03:26, 3.33s/it]
|
162 |
61%|ββββββ | 94/155 [05:20<04:08, 4.07s/it]
|
163 |
61%|βββββββ | 95/155 [05:23<03:51, 3.85s/it]
|
164 |
|
165 |
+
|
166 |
61%|βββββββ | 95/155 [05:23<03:51, 3.85s/it]
|
167 |
62%|βββββββ | 96/155 [05:27<03:37, 3.69s/it]
|
168 |
63%|βββββββ | 97/155 [05:30<03:27, 3.58s/it]
|
169 |
63%|βββββββ | 98/155 [05:33<03:19, 3.51s/it]
|
170 |
64%|βββββββ | 99/155 [05:37<03:13, 3.45s/it]
|
171 |
65%|βββββββ | 100/155 [05:40<03:07, 3.42s/it]
|
172 |
|
173 |
+
|
174 |
65%|βββββββ | 100/155 [05:40<03:07, 3.42s/it]
|
175 |
65%|βββββββ | 101/155 [05:43<03:03, 3.39s/it]
|
176 |
66%|βββββββ | 102/155 [05:47<02:58, 3.37s/it]
|
177 |
66%|βοΏ½οΏ½οΏ½βββββ | 103/155 [05:50<02:54, 3.36s/it]
|
178 |
67%|βββββββ | 104/155 [05:53<02:50, 3.35s/it]
|
179 |
68%|βββββββ | 105/155 [05:57<02:46, 3.34s/it]
|
180 |
|
181 |
+
|
182 |
68%|βββββββ | 105/155 [05:57<02:46, 3.34s/it]
|
183 |
68%|βββββββ | 106/155 [06:00<02:43, 3.34s/it]
|
184 |
69%|βββββββ | 107/155 [06:03<02:39, 3.33s/it]
|
185 |
70%|βββββββ | 108/155 [06:07<02:36, 3.33s/it]
|
186 |
70%|βββββββ | 109/155 [06:10<02:33, 3.33s/it]
|
187 |
71%|βββββββ | 110/155 [06:13<02:29, 3.33s/it]
|
188 |
|
189 |
+
|
190 |
71%|βββββββ | 110/155 [06:13<02:29, 3.33s/it]
|
191 |
72%|ββββββββ | 111/155 [06:17<02:26, 3.33s/it]
|
192 |
72%|ββββββββ | 112/155 [06:20<02:23, 3.33s/it]
|
193 |
73%|ββββββββ | 113/155 [06:23<02:19, 3.33s/it]
|
194 |
74%|ββββββββ | 114/155 [06:27<02:16, 3.33s/it]
|
195 |
74%|ββββββββ | 115/155 [06:30<02:13, 3.33s/it]
|
196 |
|
197 |
+
|
198 |
74%|ββββββββ | 115/155 [06:30<02:13, 3.33s/it]
|
199 |
75%|ββββββββ | 116/155 [06:33<02:09, 3.33s/it]
|
200 |
75%|ββββββββ | 117/155 [06:37<02:06, 3.33s/it]
|
201 |
76%|ββββββββ | 118/155 [06:40<02:03, 3.33s/it]
|
202 |
77%|ββββββββ | 119/155 [06:43<01:59, 3.33s/it]
|
203 |
77%|ββββββββ | 120/155 [06:47<01:56, 3.33s/it]
|
204 |
|
205 |
+
|
206 |
77%|ββββββββ | 120/155 [06:47<01:56, 3.33s/it]
|
207 |
78%|ββββββββ | 121/155 [06:50<01:53, 3.33s/it]
|
208 |
79%|ββββββββ | 122/155 [06:53<01:49, 3.33s/it]
|
209 |
79%|ββββββββ | 123/155 [06:57<01:46, 3.32s/it]
|
210 |
80%|ββββββββ | 124/155 [07:00<01:43, 3.32s/it]
|
211 |
81%|ββββββββ | 125/155 [07:06<02:02, 4.07s/it]
|
212 |
|
213 |
+
|
214 |
81%|ββββββββ | 125/155 [07:06<02:02, 4.07s/it]
|
215 |
81%|βββββββββ | 126/155 [07:09<01:51, 3.85s/it]
|
216 |
82%|βββββββββ | 127/155 [07:12<01:43, 3.69s/it]
|
217 |
83%|βββββββββ | 128/155 [07:16<01:36, 3.58s/it]
|
218 |
83%|βββββββββ | 129/155 [07:19<01:31, 3.50s/it]
|
219 |
84%|βββββββββ | 130/155 [07:22<01:26, 3.45s/it]
|
220 |
|
221 |
+
|
222 |
84%|βββββββββ | 130/155 [07:22<01:26, 3.45s/it]
|
223 |
85%|βββββββββ | 131/155 [07:26<01:21, 3.41s/it]
|
224 |
85%|βββββββββ | 132/155 [07:29<01:17, 3.39s/it]
|
225 |
86%|βββββββββ | 133/155 [07:32<01:14, 3.37s/it]
|
226 |
86%|βββββββββ | 134/155 [07:36<01:10, 3.36s/it]
|
227 |
87%|βββββββββ | 135/155 [07:39<01:06, 3.35s/it]
|
228 |
|
229 |
+
|
230 |
87%|βββββββββ | 135/155 [07:39<01:06, 3.35s/it]
|
231 |
88%|βββββββββ | 136/155 [07:42<01:03, 3.34s/it]
|
232 |
88%|βββββββββ | 137/155 [07:46<01:00, 3.33s/it]
|
233 |
89%|βββββββββ | 138/155 [07:49<00:56, 3.33s/it]
|
234 |
90%|βββββββββ | 139/155 [07:52<00:53, 3.33s/it]
|
235 |
90%|βββββββββ | 140/155 [07:56<00:49, 3.33s/it]
|
236 |
|
237 |
+
|
238 |
90%|βββββββββ | 140/155 [07:56<00:49, 3.33s/it]
|
239 |
91%|βββββββββ | 141/155 [07:59<00:46, 3.33s/it]
|
240 |
92%|ββββββββββ| 142/155 [08:02<00:43, 3.32s/it]
|
241 |
92%|ββββββββββ| 143/155 [08:06<00:39, 3.32s/it]
|
242 |
93%|ββββββββββ| 144/155 [08:09<00:36, 3.32s/it]
|
243 |
94%|ββββββββββ| 145/155 [08:12<00:33, 3.32s/it]
|
244 |
|
245 |
+
|
246 |
94%|ββββββββββ| 145/155 [08:12<00:33, 3.32s/it]
|
247 |
94%|ββββββββββ| 146/155 [08:16<00:29, 3.32s/it]
|
248 |
95%|ββββββββββ| 147/155 [08:19<00:26, 3.32s/it]
|
249 |
95%|ββββββββββ| 148/155 [08:22<00:23, 3.32s/it]
|
250 |
96%|ββββββββββ| 149/155 [08:26<00:19, 3.33s/it]
|
251 |
97%|ββββββββββ| 150/155 [08:29<00:16, 3.33s/it]
|
252 |
|
253 |
+
|
254 |
97%|ββββββββββ| 150/155 [08:29<00:16, 3.33s/it]
|
255 |
97%|ββββββββββ| 151/155 [08:32<00:13, 3.33s/it]
|
256 |
98%|ββββββββββ| 152/155 [08:36<00:09, 3.33s/it]
|
257 |
99%|ββββββββββ| 153/155 [08:39<00:06, 3.33s/it]
|
258 |
99%|ββββββββββ| 154/155 [08:42<00:03, 3.33s/it]
|
259 |
|
260 |
+
|
261 |
|
262 |
+
|
263 |
+
Time: 526.03
|
264 |
+
Samples/second: 14.54
|
265 |
+
GPU memory occupied: 81547 MB.
|