dat
commited on
Commit
•
6571862
1
Parent(s):
87e02e7
Saving weights and logs of step 30000
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- checkpoint_30000 +3 -0
- events.out.tfevents.1626315358.t1v-n-f5c06ea1-w-0.643445.3.v2 +3 -0
- events.out.tfevents.1626316431.t1v-n-f5c06ea1-w-0.646155.3.v2 +3 -0
- events.out.tfevents.1626317295.t1v-n-f5c06ea1-w-0.648648.3.v2 +3 -0
- events.out.tfevents.1626318014.t1v-n-f5c06ea1-w-0.651126.3.v2 +3 -0
- events.out.tfevents.1626318666.t1v-n-f5c06ea1-w-0.655476.3.v2 +3 -0
- flax_model.msgpack +1 -1
- run.sh +1 -1
- run_mlm_flax_no_accum.py +1 -1
- wandb/debug-internal.log +1 -1
- wandb/debug.log +1 -1
- wandb/latest-run +1 -1
- wandb/run-20210715_020018-3i0mvo08/files/config.yaml +3 -0
- wandb/run-20210715_020018-3i0mvo08/files/output.log +28 -0
- wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log +126 -0
- wandb/run-20210715_020018-3i0mvo08/logs/debug.log +94 -0
- wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb +0 -0
- wandb/run-20210715_021559-38yj0n5v/files/config.yaml +304 -0
- wandb/run-20210715_021559-38yj0n5v/files/output.log +37 -0
- wandb/run-20210715_021559-38yj0n5v/files/requirements.txt +94 -0
- wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json +44 -0
- wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json +1 -0
- wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log +298 -0
- wandb/run-20210715_021559-38yj0n5v/logs/debug.log +119 -0
- wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb +0 -0
- wandb/run-20210715_023352-28io0kfl/files/config.yaml +304 -0
- wandb/run-20210715_023352-28io0kfl/files/output.log +37 -0
- wandb/run-20210715_023352-28io0kfl/files/requirements.txt +94 -0
- wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json +44 -0
- wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json +1 -0
- wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log +268 -0
- wandb/run-20210715_023352-28io0kfl/logs/debug.log +119 -0
- wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb +0 -0
- wandb/run-20210715_024816-39ztwpif/files/config.yaml +304 -0
- wandb/run-20210715_024816-39ztwpif/files/output.log +37 -0
- wandb/run-20210715_024816-39ztwpif/files/requirements.txt +94 -0
- wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json +44 -0
- wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json +1 -0
- wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log +240 -0
- wandb/run-20210715_024816-39ztwpif/logs/debug.log +119 -0
- wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb +0 -0
- wandb/run-20210715_030015-30wihv4o/files/config.yaml +304 -0
- wandb/run-20210715_030015-30wihv4o/files/output.log +37 -0
- wandb/run-20210715_030015-30wihv4o/files/requirements.txt +94 -0
- wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json +44 -0
- wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json +1 -0
- wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log +232 -0
- wandb/run-20210715_030015-30wihv4o/logs/debug.log +119 -0
- wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb +0 -0
- wandb/run-20210715_031107-69jkygz3/files/config.yaml +301 -0
checkpoint_30000
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59518736214a20e8125f1484fe8db260c9384560796a22aa38130472f209af5f
|
3 |
+
size 1530270447
|
events.out.tfevents.1626315358.t1v-n-f5c06ea1-w-0.643445.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbbdcfdd6e9c4204719d1205d9986aefaa632a8322127f79c7c2db6721350035
|
3 |
+
size 40
|
events.out.tfevents.1626316431.t1v-n-f5c06ea1-w-0.646155.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d42ca9bdf3e3d3484649f49a885a13ee3f0a5215c95ce545d2555e478ec6d2c3
|
3 |
+
size 40
|
events.out.tfevents.1626317295.t1v-n-f5c06ea1-w-0.648648.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4b9c6db5c2a2231727f871c58176b8c65820459b68583d2c8f45c07f2298c60
|
3 |
+
size 40
|
events.out.tfevents.1626318014.t1v-n-f5c06ea1-w-0.651126.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e65b24a209eecf55c1ee53b6bdce4204f068eff07c0ed31b60a3fc7d1ad7de80
|
3 |
+
size 40
|
events.out.tfevents.1626318666.t1v-n-f5c06ea1-w-0.655476.3.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69b32ab8a1ed5787f81840edb5bf7961c38526a7e7a3785c8559a727406f16a2
|
3 |
+
size 4478974
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510090043
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:888817743e9128cfd6e093b2327a40d34a3acb8bcc7b90b00adfae9b27af28ec
|
3 |
size 510090043
|
run.sh
CHANGED
@@ -18,7 +18,7 @@ python ./run_mlm_flax_no_accum.py \
|
|
18 |
--eval_steps="20000" \
|
19 |
--num_train_epochs="5" \
|
20 |
--preprocessing_num_workers="96" \
|
21 |
-
--save_steps="
|
22 |
--learning_rate="3e-5" \
|
23 |
--per_device_train_batch_size="1" \
|
24 |
--per_device_eval_batch_size="1" \
|
|
|
18 |
--eval_steps="20000" \
|
19 |
--num_train_epochs="5" \
|
20 |
--preprocessing_num_workers="96" \
|
21 |
+
--save_steps="30000" \
|
22 |
--learning_rate="3e-5" \
|
23 |
--per_device_train_batch_size="1" \
|
24 |
--per_device_eval_batch_size="1" \
|
run_mlm_flax_no_accum.py
CHANGED
@@ -421,7 +421,7 @@ if __name__ == "__main__":
|
|
421 |
tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
|
422 |
logger.info("Setting max validation examples to ")
|
423 |
print(f"Number of validation examples {data_args.max_eval_samples}")
|
424 |
-
|
425 |
if data_args.max_eval_samples is not None:
|
426 |
tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
|
427 |
else:
|
|
|
421 |
tokenized_datasets = DatasetDict.load_from_disk("/data/tokenized_data")
|
422 |
logger.info("Setting max validation examples to ")
|
423 |
print(f"Number of validation examples {data_args.max_eval_samples}")
|
424 |
+
tokenized_datasets["train"]= tokenized_datasets["train"].select(range(int(0.35*len(tokenized_datasets["train"]))))
|
425 |
if data_args.max_eval_samples is not None:
|
426 |
tokenized_datasets["validation"] = tokenized_datasets["validation"].select(range(data_args.max_eval_samples))
|
427 |
else:
|
wandb/debug-internal.log
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_031107-69jkygz3/logs/debug-internal.log
|
wandb/debug.log
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_031107-69jkygz3/logs/debug.log
|
wandb/latest-run
CHANGED
@@ -1 +1 @@
|
|
1 |
-
run-
|
|
|
1 |
+
run-20210715_031107-69jkygz3
|
wandb/run-20210715_020018-3i0mvo08/files/config.yaml
CHANGED
@@ -13,6 +13,9 @@ _wandb:
|
|
13 |
1:
|
14 |
- 3
|
15 |
- 11
|
|
|
|
|
|
|
16 |
4: 3.8.10
|
17 |
5: 0.10.33
|
18 |
6: 4.9.0.dev0
|
|
|
13 |
1:
|
14 |
- 3
|
15 |
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
4: 3.8.10
|
20 |
5: 0.10.33
|
21 |
6: 4.9.0.dev0
|
wandb/run-20210715_020018-3i0mvo08/files/output.log
CHANGED
@@ -2,3 +2,31 @@
|
|
2 |
warnings.warn(
|
3 |
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
4 |
warnings.warn(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
warnings.warn(
|
3 |
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
4 |
warnings.warn(
|
5 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]2021-07-15 02:14:44.903616: E external/org_tensorflow/tensorflow/compiler/xla/pjrt/pjrt_stream_executor_client.cc:2036] Execution of replica 0 failed: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0)
|
6 |
+
Epoch ... (1/5): 0%| | 0/5 [14:10<?, ?it/s]
|
7 |
+
Traceback (most recent call last):
|
8 |
+
File "./run_mlm_flax_no_accum.py", line 690, in <module>
|
9 |
+
train_batch_idx = generate_batch_splits(train_samples_idx, train_batch_size)
|
10 |
+
File "./run_mlm_flax_no_accum.py", line 255, in generate_batch_splits
|
11 |
+
batch_idx = np.split(samples_idx, sections_split)
|
12 |
+
File "<__array_function__ internals>", line 5, in split
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 874, in split
|
14 |
+
return array_split(ary, indices_or_sections, axis)
|
15 |
+
File "<__array_function__ internals>", line 5, in array_split
|
16 |
+
File "/home/dat/pino/lib/python3.8/site-packages/numpy/lib/shape_base.py", line 790, in array_split
|
17 |
+
sub_arys.append(_nx.swapaxes(sary[st:end], axis, 0))
|
18 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5009, in _rewriting_take
|
19 |
+
return _gather(arr, treedef, static_idx, dynamic_idx, indices_are_sorted,
|
20 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/numpy/lax_numpy.py", line 5028, in _gather
|
21 |
+
y = lax.gather(
|
22 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/lax/lax.py", line 984, in gather
|
23 |
+
return gather_p.bind(
|
24 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 264, in bind
|
25 |
+
out = top_trace.process_primitive(self, tracers, params)
|
26 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 603, in process_primitive
|
27 |
+
return primitive.impl(*tracers, **params)
|
28 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 249, in apply_primitive
|
29 |
+
return compiled_fun(*args)
|
30 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/xla.py", line 365, in _execute_compiled_primitive
|
31 |
+
out_bufs = compiled.execute(input_bufs)
|
32 |
+
RuntimeError: Resource exhausted: Attempting to allocate 17.0K. That was not possible. There are 48.0K free. Due to fragmentation, the largest contiguous region of free memory is 16.0K.; (0x0x0_HBM0)
|
wandb/run-20210715_020018-3i0mvo08/logs/debug-internal.log
CHANGED
@@ -154,3 +154,129 @@
|
|
154 |
2021-07-15 02:12:27,302 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
155 |
2021-07-15 02:12:42,431 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
156 |
2021-07-15 02:12:42,432 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
2021-07-15 02:12:27,302 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
155 |
2021-07-15 02:12:42,431 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
156 |
2021-07-15 02:12:42,432 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
157 |
+
2021-07-15 02:12:50,705 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
|
158 |
+
2021-07-15 02:12:57,560 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
159 |
+
2021-07-15 02:12:57,561 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
160 |
+
2021-07-15 02:13:12,692 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
161 |
+
2021-07-15 02:13:12,692 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
162 |
+
2021-07-15 02:13:20,785 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
|
163 |
+
2021-07-15 02:13:27,826 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
164 |
+
2021-07-15 02:13:27,826 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
165 |
+
2021-07-15 02:13:42,962 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
166 |
+
2021-07-15 02:13:42,963 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
167 |
+
2021-07-15 02:13:50,860 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
|
168 |
+
2021-07-15 02:13:58,097 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
169 |
+
2021-07-15 02:13:58,097 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
170 |
+
2021-07-15 02:14:13,229 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
171 |
+
2021-07-15 02:14:13,229 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
172 |
+
2021-07-15 02:14:20,935 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
|
173 |
+
2021-07-15 02:14:28,363 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
174 |
+
2021-07-15 02:14:28,363 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
175 |
+
2021-07-15 02:14:43,496 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: stop_status
|
176 |
+
2021-07-15 02:14:43,496 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: stop_status
|
177 |
+
2021-07-15 02:14:46,031 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log
|
178 |
+
2021-07-15 02:14:47,302 DEBUG SenderThread:641950 [sender.py:send():179] send: telemetry
|
179 |
+
2021-07-15 02:14:47,303 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
180 |
+
2021-07-15 02:14:47,303 DEBUG SenderThread:641950 [sender.py:send():179] send: exit
|
181 |
+
2021-07-15 02:14:47,303 INFO SenderThread:641950 [sender.py:send_exit():287] handling exit code: 1
|
182 |
+
2021-07-15 02:14:47,304 INFO SenderThread:641950 [sender.py:send_exit():295] send defer
|
183 |
+
2021-07-15 02:14:47,305 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
184 |
+
2021-07-15 02:14:47,305 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
185 |
+
2021-07-15 02:14:47,306 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 0
|
186 |
+
2021-07-15 02:14:47,306 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
187 |
+
2021-07-15 02:14:47,306 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 0
|
188 |
+
2021-07-15 02:14:47,306 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 1
|
189 |
+
2021-07-15 02:14:47,306 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
190 |
+
2021-07-15 02:14:47,306 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 1
|
191 |
+
2021-07-15 02:14:47,401 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
192 |
+
2021-07-15 02:14:47,401 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 1
|
193 |
+
2021-07-15 02:14:47,401 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 2
|
194 |
+
2021-07-15 02:14:47,401 DEBUG SenderThread:641950 [sender.py:send():179] send: stats
|
195 |
+
2021-07-15 02:14:47,402 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
196 |
+
2021-07-15 02:14:47,402 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 2
|
197 |
+
2021-07-15 02:14:47,402 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
198 |
+
2021-07-15 02:14:47,402 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 2
|
199 |
+
2021-07-15 02:14:47,402 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 3
|
200 |
+
2021-07-15 02:14:47,403 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
201 |
+
2021-07-15 02:14:47,403 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 3
|
202 |
+
2021-07-15 02:14:47,403 DEBUG SenderThread:641950 [sender.py:send():179] send: summary
|
203 |
+
2021-07-15 02:14:47,403 INFO SenderThread:641950 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
204 |
+
2021-07-15 02:14:47,404 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
205 |
+
2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 3
|
206 |
+
2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 4
|
207 |
+
2021-07-15 02:14:47,404 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
208 |
+
2021-07-15 02:14:47,404 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 4
|
209 |
+
2021-07-15 02:14:47,404 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
210 |
+
2021-07-15 02:14:47,404 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 4
|
211 |
+
2021-07-15 02:14:47,409 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
212 |
+
2021-07-15 02:14:47,585 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 5
|
213 |
+
2021-07-15 02:14:47,586 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
214 |
+
2021-07-15 02:14:47,586 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
215 |
+
2021-07-15 02:14:47,586 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 5
|
216 |
+
2021-07-15 02:14:47,586 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
217 |
+
2021-07-15 02:14:47,586 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 5
|
218 |
+
2021-07-15 02:14:47,586 INFO SenderThread:641950 [dir_watcher.py:finish():282] shutting down directory watcher
|
219 |
+
2021-07-15 02:14:47,688 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
220 |
+
2021-07-15 02:14:48,032 INFO Thread-8 :641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml
|
221 |
+
2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json
|
222 |
+
2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log
|
223 |
+
2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files
|
224 |
+
2021-07-15 02:14:48,033 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt requirements.txt
|
225 |
+
2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log output.log
|
226 |
+
2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-metadata.json wandb-metadata.json
|
227 |
+
2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml config.yaml
|
228 |
+
2021-07-15 02:14:48,034 INFO SenderThread:641950 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json wandb-summary.json
|
229 |
+
2021-07-15 02:14:48,034 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 6
|
230 |
+
2021-07-15 02:14:48,034 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
231 |
+
2021-07-15 02:14:48,035 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
232 |
+
2021-07-15 02:14:48,035 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 6
|
233 |
+
2021-07-15 02:14:48,036 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
234 |
+
2021-07-15 02:14:48,036 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 6
|
235 |
+
2021-07-15 02:14:48,036 INFO SenderThread:641950 [file_pusher.py:finish():177] shutting down file pusher
|
236 |
+
2021-07-15 02:14:48,137 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
237 |
+
2021-07-15 02:14:48,137 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
238 |
+
2021-07-15 02:14:48,239 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
239 |
+
2021-07-15 02:14:48,240 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
240 |
+
2021-07-15 02:14:48,342 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
241 |
+
2021-07-15 02:14:48,342 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
242 |
+
2021-07-15 02:14:48,444 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
243 |
+
2021-07-15 02:14:48,444 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
244 |
+
2021-07-15 02:14:48,475 INFO Thread-15 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/wandb-summary.json
|
245 |
+
2021-07-15 02:14:48,479 INFO Thread-13 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/output.log
|
246 |
+
2021-07-15 02:14:48,493 INFO Thread-14 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/config.yaml
|
247 |
+
2021-07-15 02:14:48,547 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
248 |
+
2021-07-15 02:14:48,548 INFO Thread-12 :641950 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/files/requirements.txt
|
249 |
+
2021-07-15 02:14:48,548 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
250 |
+
2021-07-15 02:14:48,650 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
251 |
+
2021-07-15 02:14:48,650 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
252 |
+
2021-07-15 02:14:48,749 INFO Thread-7 :641950 [sender.py:transition_state():308] send defer: 7
|
253 |
+
2021-07-15 02:14:48,749 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
254 |
+
2021-07-15 02:14:48,749 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 7
|
255 |
+
2021-07-15 02:14:48,749 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
256 |
+
2021-07-15 02:14:48,750 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 7
|
257 |
+
2021-07-15 02:14:48,752 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
258 |
+
2021-07-15 02:14:49,034 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 8
|
259 |
+
2021-07-15 02:14:49,034 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
260 |
+
2021-07-15 02:14:49,035 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
261 |
+
2021-07-15 02:14:49,035 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 8
|
262 |
+
2021-07-15 02:14:49,035 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
263 |
+
2021-07-15 02:14:49,035 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 8
|
264 |
+
2021-07-15 02:14:49,035 INFO SenderThread:641950 [sender.py:transition_state():308] send defer: 9
|
265 |
+
2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send():179] send: final
|
266 |
+
2021-07-15 02:14:49,036 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: defer
|
267 |
+
2021-07-15 02:14:49,036 INFO HandlerThread:641950 [handler.py:handle_request_defer():141] handle defer: 9
|
268 |
+
2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send():179] send: footer
|
269 |
+
2021-07-15 02:14:49,036 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: defer
|
270 |
+
2021-07-15 02:14:49,036 INFO SenderThread:641950 [sender.py:send_request_defer():304] handle sender defer: 9
|
271 |
+
2021-07-15 02:14:49,137 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: poll_exit
|
272 |
+
2021-07-15 02:14:49,137 DEBUG SenderThread:641950 [sender.py:send_request():193] send_request: poll_exit
|
273 |
+
2021-07-15 02:14:49,137 INFO SenderThread:641950 [file_pusher.py:join():182] waiting for file pusher
|
274 |
+
2021-07-15 02:14:49,139 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: get_summary
|
275 |
+
2021-07-15 02:14:49,139 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: sampled_history
|
276 |
+
2021-07-15 02:14:49,140 DEBUG HandlerThread:641950 [handler.py:handle_request():124] handle_request: shutdown
|
277 |
+
2021-07-15 02:14:49,140 INFO HandlerThread:641950 [handler.py:finish():638] shutting down handler
|
278 |
+
2021-07-15 02:14:50,037 INFO WriterThread:641950 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb
|
279 |
+
2021-07-15 02:14:50,138 INFO SenderThread:641950 [sender.py:finish():945] shutting down sender
|
280 |
+
2021-07-15 02:14:50,138 INFO SenderThread:641950 [file_pusher.py:finish():177] shutting down file pusher
|
281 |
+
2021-07-15 02:14:50,138 INFO SenderThread:641950 [file_pusher.py:join():182] waiting for file pusher
|
282 |
+
2021-07-15 02:14:50,141 INFO MainThread:641950 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210715_020018-3i0mvo08/logs/debug.log
CHANGED
@@ -23,3 +23,97 @@ config: {}
|
|
23 |
2021-07-15 02:00:20,876 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-00-11_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
2021-07-15 02:00:20,878 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
2021-07-15 02:00:20,879 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
2021-07-15 02:00:20,876 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-00-11_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
2021-07-15 02:00:20,878 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
2021-07-15 02:00:20,879 INFO MainThread:640692 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
|
26 |
+
2021-07-15 02:14:44,909 INFO MainThread:640692 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-15 02:14:44,910 INFO MainThread:640692 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-15 02:14:47,306 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1375
|
33 |
+
total_bytes: 1375
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-15 02:14:47,586 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1375
|
41 |
+
total_bytes: 1375
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-15 02:14:48,036 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 3
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1375
|
49 |
+
total_bytes: 5986
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-15 02:14:48,138 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1375
|
57 |
+
total_bytes: 10555
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-15 02:14:48,240 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 10555
|
65 |
+
total_bytes: 10555
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-15 02:14:48,343 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 10555
|
73 |
+
total_bytes: 10555
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-15 02:14:48,445 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 10555
|
81 |
+
total_bytes: 10555
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-15 02:14:48,549 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 10555
|
89 |
+
total_bytes: 10555
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-15 02:14:48,651 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 10555
|
97 |
+
total_bytes: 10555
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-15 02:14:49,035 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 10555
|
105 |
+
total_bytes: 10555
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-15 02:14:49,138 INFO MainThread:640692 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 10555
|
116 |
+
total_bytes: 10555
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-15 02:14:50,442 INFO MainThread:640692 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb
CHANGED
Binary files a/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb and b/wandb/run-20210715_020018-3i0mvo08/run-3i0mvo08.wandb differ
|
|
wandb/run-20210715_021559-38yj0n5v/files/config.yaml
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 1
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 3.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul15_02-15-50_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 50
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 500
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 1
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_steps:
|
255 |
+
desc: null
|
256 |
+
value: 20000
|
257 |
+
save_strategy:
|
258 |
+
desc: null
|
259 |
+
value: IntervalStrategy.STEPS
|
260 |
+
save_total_limit:
|
261 |
+
desc: null
|
262 |
+
value: 5
|
263 |
+
seed:
|
264 |
+
desc: null
|
265 |
+
value: 42
|
266 |
+
sharded_ddp:
|
267 |
+
desc: null
|
268 |
+
value: []
|
269 |
+
skip_memory_metrics:
|
270 |
+
desc: null
|
271 |
+
value: true
|
272 |
+
tokenizer_name:
|
273 |
+
desc: null
|
274 |
+
value: ./
|
275 |
+
tpu_metrics_debug:
|
276 |
+
desc: null
|
277 |
+
value: false
|
278 |
+
tpu_num_cores:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
train_ref_file:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
use_fast_tokenizer:
|
285 |
+
desc: null
|
286 |
+
value: true
|
287 |
+
use_legacy_prediction_loop:
|
288 |
+
desc: null
|
289 |
+
value: false
|
290 |
+
validation_ref_file:
|
291 |
+
desc: null
|
292 |
+
value: null
|
293 |
+
validation_split_percentage:
|
294 |
+
desc: null
|
295 |
+
value: 5
|
296 |
+
warmup_ratio:
|
297 |
+
desc: null
|
298 |
+
value: 0.0
|
299 |
+
warmup_steps:
|
300 |
+
desc: null
|
301 |
+
value: 10000
|
302 |
+
weight_decay:
|
303 |
+
desc: null
|
304 |
+
value: 0.0095
|
wandb/run-20210715_021559-38yj0n5v/files/output.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
2 |
+
warnings.warn(
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
|
6 |
+
Training...: 0%| | 0/907114 [02:05<?, ?it/s]
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [15:48<?, ?it/s]
|
8 |
+
Traceback (most recent call last):
|
9 |
+
File "./run_mlm_flax_no_accum.py", line 699, in <module>
|
10 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
11 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
|
12 |
+
return fun(*args, **kwargs)
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
|
14 |
+
out = pxla.xla_pmap(
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
|
16 |
+
return call_bind(self, fun, *args, **params)
|
17 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
|
18 |
+
outs = primitive.process(top_trace, fun, tracers, params)
|
19 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
|
20 |
+
return trace.process_map(self, fun, tracers, params)
|
21 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
|
22 |
+
return primitive.impl(f, *tracers, **params)
|
23 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
|
24 |
+
return compiled_fun(*args)
|
25 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
26 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
27 |
+
jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 188.47M free, 0B reserved, and 6.75M reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
28 |
+
The stack trace below excludes JAX-internal frames.
|
29 |
+
The preceding is the original exception that occurred, unmodified.
|
30 |
+
--------------------
|
31 |
+
The above exception was the direct cause of the following exception:
|
32 |
+
Traceback (most recent call last):
|
33 |
+
File "./run_mlm_flax_no_accum.py", line 699, in <module>
|
34 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
35 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
36 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
37 |
+
RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 188.47M free, 0B reserved, and 6.75M reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
wandb/run-20210715_021559-38yj0n5v/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-15T02:16:01.117383",
|
5 |
+
"startedAt": "2021-07-15T02:15:59.045700",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=50",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=20000",
|
26 |
+
"--learning_rate=3e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=500"
|
31 |
+
],
|
32 |
+
"state": "running",
|
33 |
+
"program": "./run_mlm_flax_no_accum.py",
|
34 |
+
"codePath": "run_mlm_flax_no_accum.py",
|
35 |
+
"git": {
|
36 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
37 |
+
"commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303"
|
38 |
+
},
|
39 |
+
"email": null,
|
40 |
+
"root": "/home/dat/pino-roberta-base",
|
41 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
42 |
+
"username": "dat",
|
43 |
+
"executable": "/home/dat/pino/bin/python"
|
44 |
+
}
|
wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log
ADDED
@@ -0,0 +1,298 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 02:15:59,792 INFO MainThread:644701 [internal.py:wandb_internal():88] W&B internal server running at pid: 644701, started at: 2021-07-15 02:15:59.792106
|
2 |
+
2021-07-15 02:15:59,795 DEBUG SenderThread:644701 [sender.py:send():179] send: header
|
3 |
+
2021-07-15 02:15:59,795 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: check_version
|
4 |
+
2021-07-15 02:15:59,795 INFO WriterThread:644701 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb
|
5 |
+
2021-07-15 02:15:59,796 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-15 02:15:59,833 DEBUG SenderThread:644701 [sender.py:send():179] send: run
|
7 |
+
2021-07-15 02:16:00,007 INFO SenderThread:644701 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files
|
8 |
+
2021-07-15 02:16:00,007 INFO SenderThread:644701 [sender.py:_start_run_threads():716] run started: 38yj0n5v with start time 1626315359
|
9 |
+
2021-07-15 02:16:00,007 DEBUG SenderThread:644701 [sender.py:send():179] send: summary
|
10 |
+
2021-07-15 02:16:00,008 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-15 02:16:00,008 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-15 02:16:01,010 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json
|
13 |
+
2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-15 02:16:01,117 DEBUG HandlerThread:644701 [meta.py:probe():210] probe
|
16 |
+
2021-07-15 02:16:01,118 DEBUG HandlerThread:644701 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-15 02:16:01,149 DEBUG HandlerThread:644701 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-15 02:16:01,150 DEBUG HandlerThread:644701 [meta.py:probe():252] probe done
|
21 |
+
2021-07-15 02:16:01,154 DEBUG SenderThread:644701 [sender.py:send():179] send: files
|
22 |
+
2021-07-15 02:16:01,154 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-15 02:16:01,160 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-15 02:16:01,161 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-15 02:16:01,290 DEBUG SenderThread:644701 [sender.py:send():179] send: config
|
26 |
+
2021-07-15 02:16:01,291 DEBUG SenderThread:644701 [sender.py:send():179] send: config
|
27 |
+
2021-07-15 02:16:01,291 DEBUG SenderThread:644701 [sender.py:send():179] send: config
|
28 |
+
2021-07-15 02:16:01,718 INFO Thread-11 :644701 [upload_job.py:push():137] Uploaded file /tmp/tmp__ipqk3vwandb/1qcixa2k-wandb-metadata.json
|
29 |
+
2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
|
30 |
+
2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt
|
31 |
+
2021-07-15 02:16:02,009 INFO Thread-8 :644701 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json
|
32 |
+
2021-07-15 02:16:16,015 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
|
33 |
+
2021-07-15 02:16:16,292 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-15 02:16:16,293 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-15 02:16:29,202 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
36 |
+
2021-07-15 02:16:31,021 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml
|
37 |
+
2021-07-15 02:16:31,425 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
38 |
+
2021-07-15 02:16:31,425 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
39 |
+
2021-07-15 02:16:46,555 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
+
2021-07-15 02:16:46,555 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
41 |
+
2021-07-15 02:16:59,284 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
42 |
+
2021-07-15 02:17:01,687 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
43 |
+
2021-07-15 02:17:01,687 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
44 |
+
2021-07-15 02:17:16,819 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
45 |
+
2021-07-15 02:17:16,820 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
46 |
+
2021-07-15 02:17:29,359 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
47 |
+
2021-07-15 02:17:31,951 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
48 |
+
2021-07-15 02:17:31,951 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
49 |
+
2021-07-15 02:17:47,083 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
50 |
+
2021-07-15 02:17:47,083 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
51 |
+
2021-07-15 02:17:59,439 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
52 |
+
2021-07-15 02:18:02,215 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
53 |
+
2021-07-15 02:18:02,215 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
54 |
+
2021-07-15 02:18:17,355 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
55 |
+
2021-07-15 02:18:17,356 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
56 |
+
2021-07-15 02:18:29,519 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
57 |
+
2021-07-15 02:18:32,491 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
58 |
+
2021-07-15 02:18:32,492 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
59 |
+
2021-07-15 02:18:47,624 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
60 |
+
2021-07-15 02:18:47,624 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
61 |
+
2021-07-15 02:18:59,595 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
62 |
+
2021-07-15 02:19:02,759 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
63 |
+
2021-07-15 02:19:02,759 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
64 |
+
2021-07-15 02:19:17,890 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
65 |
+
2021-07-15 02:19:17,890 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
66 |
+
2021-07-15 02:19:29,672 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
67 |
+
2021-07-15 02:19:33,021 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
68 |
+
2021-07-15 02:19:33,022 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
69 |
+
2021-07-15 02:19:48,153 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
70 |
+
2021-07-15 02:19:48,154 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
71 |
+
2021-07-15 02:19:59,751 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
72 |
+
2021-07-15 02:20:03,293 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
73 |
+
2021-07-15 02:20:03,294 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
74 |
+
2021-07-15 02:20:18,425 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
75 |
+
2021-07-15 02:20:18,426 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
76 |
+
2021-07-15 02:20:29,828 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
77 |
+
2021-07-15 02:20:33,560 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
78 |
+
2021-07-15 02:20:33,560 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
79 |
+
2021-07-15 02:20:48,726 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
80 |
+
2021-07-15 02:20:48,726 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
81 |
+
2021-07-15 02:20:59,906 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
82 |
+
2021-07-15 02:21:03,857 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
83 |
+
2021-07-15 02:21:03,858 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
84 |
+
2021-07-15 02:21:18,990 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
85 |
+
2021-07-15 02:21:18,991 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
86 |
+
2021-07-15 02:21:29,980 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
87 |
+
2021-07-15 02:21:34,126 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
88 |
+
2021-07-15 02:21:34,126 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
89 |
+
2021-07-15 02:21:49,258 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
90 |
+
2021-07-15 02:21:49,258 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
91 |
+
2021-07-15 02:22:00,053 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
92 |
+
2021-07-15 02:22:04,390 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
93 |
+
2021-07-15 02:22:04,391 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
94 |
+
2021-07-15 02:22:19,527 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-15 02:22:19,527 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-15 02:22:30,130 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
97 |
+
2021-07-15 02:22:34,658 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
98 |
+
2021-07-15 02:22:34,658 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
99 |
+
2021-07-15 02:22:49,790 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-15 02:22:49,790 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-15 02:23:00,206 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
102 |
+
2021-07-15 02:23:04,919 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
103 |
+
2021-07-15 02:23:04,920 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
104 |
+
2021-07-15 02:23:20,062 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-15 02:23:20,063 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-15 02:23:30,267 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
107 |
+
2021-07-15 02:23:35,199 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
108 |
+
2021-07-15 02:23:35,199 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
109 |
+
2021-07-15 02:23:50,332 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
110 |
+
2021-07-15 02:23:50,332 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
111 |
+
2021-07-15 02:24:00,346 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
112 |
+
2021-07-15 02:24:05,465 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
113 |
+
2021-07-15 02:24:05,466 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
114 |
+
2021-07-15 02:24:20,598 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
115 |
+
2021-07-15 02:24:20,598 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
116 |
+
2021-07-15 02:24:30,424 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
117 |
+
2021-07-15 02:24:35,751 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
118 |
+
2021-07-15 02:24:35,751 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
119 |
+
2021-07-15 02:24:50,888 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
120 |
+
2021-07-15 02:24:50,888 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
121 |
+
2021-07-15 02:25:00,500 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
122 |
+
2021-07-15 02:25:06,021 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
123 |
+
2021-07-15 02:25:06,022 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
124 |
+
2021-07-15 02:25:21,156 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
125 |
+
2021-07-15 02:25:21,157 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
126 |
+
2021-07-15 02:25:30,575 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
127 |
+
2021-07-15 02:25:36,290 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
128 |
+
2021-07-15 02:25:36,291 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
129 |
+
2021-07-15 02:25:51,426 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
130 |
+
2021-07-15 02:25:51,426 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
131 |
+
2021-07-15 02:26:00,654 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
132 |
+
2021-07-15 02:26:06,562 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
133 |
+
2021-07-15 02:26:06,562 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
134 |
+
2021-07-15 02:26:21,692 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
135 |
+
2021-07-15 02:26:21,693 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
136 |
+
2021-07-15 02:26:30,729 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
137 |
+
2021-07-15 02:26:36,825 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
138 |
+
2021-07-15 02:26:36,825 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
139 |
+
2021-07-15 02:26:51,959 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
140 |
+
2021-07-15 02:26:51,959 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
141 |
+
2021-07-15 02:27:00,798 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
142 |
+
2021-07-15 02:27:07,091 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
143 |
+
2021-07-15 02:27:07,091 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
144 |
+
2021-07-15 02:27:22,224 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
145 |
+
2021-07-15 02:27:22,224 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
146 |
+
2021-07-15 02:27:30,870 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
147 |
+
2021-07-15 02:27:37,360 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
148 |
+
2021-07-15 02:27:37,360 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
149 |
+
2021-07-15 02:27:52,491 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
150 |
+
2021-07-15 02:27:52,491 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
151 |
+
2021-07-15 02:28:00,938 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
152 |
+
2021-07-15 02:28:07,622 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
153 |
+
2021-07-15 02:28:07,622 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
154 |
+
2021-07-15 02:28:22,754 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
155 |
+
2021-07-15 02:28:22,755 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
156 |
+
2021-07-15 02:28:31,010 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
157 |
+
2021-07-15 02:28:37,888 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
158 |
+
2021-07-15 02:28:37,888 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
159 |
+
2021-07-15 02:28:53,020 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
160 |
+
2021-07-15 02:28:53,021 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
161 |
+
2021-07-15 02:29:01,085 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
162 |
+
2021-07-15 02:29:08,157 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
163 |
+
2021-07-15 02:29:08,157 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
164 |
+
2021-07-15 02:29:23,289 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
165 |
+
2021-07-15 02:29:23,289 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
166 |
+
2021-07-15 02:29:31,158 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
167 |
+
2021-07-15 02:29:38,420 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
168 |
+
2021-07-15 02:29:38,420 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
169 |
+
2021-07-15 02:29:53,553 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
170 |
+
2021-07-15 02:29:53,553 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
171 |
+
2021-07-15 02:29:58,389 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
|
172 |
+
2021-07-15 02:30:01,235 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
173 |
+
2021-07-15 02:30:08,702 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
174 |
+
2021-07-15 02:30:08,702 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
175 |
+
2021-07-15 02:30:23,843 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
176 |
+
2021-07-15 02:30:23,843 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
177 |
+
2021-07-15 02:30:31,315 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
178 |
+
2021-07-15 02:30:38,973 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
179 |
+
2021-07-15 02:30:38,973 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
180 |
+
2021-07-15 02:30:54,105 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
181 |
+
2021-07-15 02:30:54,106 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
182 |
+
2021-07-15 02:31:01,399 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
183 |
+
2021-07-15 02:31:09,240 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
184 |
+
2021-07-15 02:31:09,240 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
185 |
+
2021-07-15 02:31:24,379 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
186 |
+
2021-07-15 02:31:24,379 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
187 |
+
2021-07-15 02:31:31,480 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
188 |
+
2021-07-15 02:31:39,512 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
189 |
+
2021-07-15 02:31:39,512 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
190 |
+
2021-07-15 02:31:54,644 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: stop_status
|
191 |
+
2021-07-15 02:31:54,644 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: stop_status
|
192 |
+
2021-07-15 02:32:01,553 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
193 |
+
2021-07-15 02:32:04,443 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
|
194 |
+
2021-07-15 02:32:04,474 DEBUG SenderThread:644701 [sender.py:send():179] send: telemetry
|
195 |
+
2021-07-15 02:32:04,474 DEBUG SenderThread:644701 [sender.py:send():179] send: exit
|
196 |
+
2021-07-15 02:32:04,474 INFO SenderThread:644701 [sender.py:send_exit():287] handling exit code: 1
|
197 |
+
2021-07-15 02:32:04,476 INFO SenderThread:644701 [sender.py:send_exit():295] send defer
|
198 |
+
2021-07-15 02:32:04,476 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
199 |
+
2021-07-15 02:32:04,477 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
200 |
+
2021-07-15 02:32:04,477 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
201 |
+
2021-07-15 02:32:04,477 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 0
|
202 |
+
2021-07-15 02:32:04,478 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
203 |
+
2021-07-15 02:32:04,478 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 0
|
204 |
+
2021-07-15 02:32:04,478 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 1
|
205 |
+
2021-07-15 02:32:04,478 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
206 |
+
2021-07-15 02:32:04,478 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 1
|
207 |
+
2021-07-15 02:32:04,561 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
208 |
+
2021-07-15 02:32:04,561 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 1
|
209 |
+
2021-07-15 02:32:04,562 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 2
|
210 |
+
2021-07-15 02:32:04,562 DEBUG SenderThread:644701 [sender.py:send():179] send: stats
|
211 |
+
2021-07-15 02:32:04,562 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
212 |
+
2021-07-15 02:32:04,562 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 2
|
213 |
+
2021-07-15 02:32:04,562 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
214 |
+
2021-07-15 02:32:04,563 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 2
|
215 |
+
2021-07-15 02:32:04,563 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 3
|
216 |
+
2021-07-15 02:32:04,563 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
217 |
+
2021-07-15 02:32:04,563 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 3
|
218 |
+
2021-07-15 02:32:04,563 DEBUG SenderThread:644701 [sender.py:send():179] send: summary
|
219 |
+
2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
220 |
+
2021-07-15 02:32:04,564 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
221 |
+
2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 3
|
222 |
+
2021-07-15 02:32:04,564 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 4
|
223 |
+
2021-07-15 02:32:04,565 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
224 |
+
2021-07-15 02:32:04,565 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 4
|
225 |
+
2021-07-15 02:32:04,565 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
226 |
+
2021-07-15 02:32:04,565 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 4
|
227 |
+
2021-07-15 02:32:04,580 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
228 |
+
2021-07-15 02:32:04,749 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 5
|
229 |
+
2021-07-15 02:32:04,749 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
230 |
+
2021-07-15 02:32:04,749 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
231 |
+
2021-07-15 02:32:04,750 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 5
|
232 |
+
2021-07-15 02:32:04,750 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
233 |
+
2021-07-15 02:32:04,750 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 5
|
234 |
+
2021-07-15 02:32:04,750 INFO SenderThread:644701 [dir_watcher.py:finish():282] shutting down directory watcher
|
235 |
+
2021-07-15 02:32:04,851 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
236 |
+
2021-07-15 02:32:05,444 INFO Thread-8 :644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
|
237 |
+
2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json
|
238 |
+
2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml
|
239 |
+
2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files
|
240 |
+
2021-07-15 02:32:05,445 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt requirements.txt
|
241 |
+
2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log output.log
|
242 |
+
2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-metadata.json wandb-metadata.json
|
243 |
+
2021-07-15 02:32:05,446 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml config.yaml
|
244 |
+
2021-07-15 02:32:05,450 INFO SenderThread:644701 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json wandb-summary.json
|
245 |
+
2021-07-15 02:32:05,453 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 6
|
246 |
+
2021-07-15 02:32:05,453 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
247 |
+
2021-07-15 02:32:05,455 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
248 |
+
2021-07-15 02:32:05,457 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 6
|
249 |
+
2021-07-15 02:32:05,458 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
250 |
+
2021-07-15 02:32:05,461 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 6
|
251 |
+
2021-07-15 02:32:05,461 INFO SenderThread:644701 [file_pusher.py:finish():177] shutting down file pusher
|
252 |
+
2021-07-15 02:32:05,556 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
253 |
+
2021-07-15 02:32:05,556 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
254 |
+
2021-07-15 02:32:05,658 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
255 |
+
2021-07-15 02:32:05,659 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
256 |
+
2021-07-15 02:32:05,761 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
257 |
+
2021-07-15 02:32:05,761 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
258 |
+
2021-07-15 02:32:05,863 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
259 |
+
2021-07-15 02:32:05,863 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
260 |
+
2021-07-15 02:32:05,888 INFO Thread-15 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/wandb-summary.json
|
261 |
+
2021-07-15 02:32:05,892 INFO Thread-13 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/output.log
|
262 |
+
2021-07-15 02:32:05,894 INFO Thread-14 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/config.yaml
|
263 |
+
2021-07-15 02:32:05,895 INFO Thread-12 :644701 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/files/requirements.txt
|
264 |
+
2021-07-15 02:32:05,965 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
265 |
+
2021-07-15 02:32:05,965 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
266 |
+
2021-07-15 02:32:06,067 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
267 |
+
2021-07-15 02:32:06,067 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
268 |
+
2021-07-15 02:32:06,096 INFO Thread-7 :644701 [sender.py:transition_state():308] send defer: 7
|
269 |
+
2021-07-15 02:32:06,096 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
270 |
+
2021-07-15 02:32:06,096 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 7
|
271 |
+
2021-07-15 02:32:06,097 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
272 |
+
2021-07-15 02:32:06,097 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 7
|
273 |
+
2021-07-15 02:32:06,169 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
274 |
+
2021-07-15 02:32:06,370 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 8
|
275 |
+
2021-07-15 02:32:06,370 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
276 |
+
2021-07-15 02:32:06,371 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
277 |
+
2021-07-15 02:32:06,371 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 8
|
278 |
+
2021-07-15 02:32:06,371 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
279 |
+
2021-07-15 02:32:06,371 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 8
|
280 |
+
2021-07-15 02:32:06,371 INFO SenderThread:644701 [sender.py:transition_state():308] send defer: 9
|
281 |
+
2021-07-15 02:32:06,372 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: defer
|
282 |
+
2021-07-15 02:32:06,372 INFO HandlerThread:644701 [handler.py:handle_request_defer():141] handle defer: 9
|
283 |
+
2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send():179] send: final
|
284 |
+
2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send():179] send: footer
|
285 |
+
2021-07-15 02:32:06,372 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: defer
|
286 |
+
2021-07-15 02:32:06,372 INFO SenderThread:644701 [sender.py:send_request_defer():304] handle sender defer: 9
|
287 |
+
2021-07-15 02:32:06,472 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: poll_exit
|
288 |
+
2021-07-15 02:32:06,472 DEBUG SenderThread:644701 [sender.py:send_request():193] send_request: poll_exit
|
289 |
+
2021-07-15 02:32:06,473 INFO SenderThread:644701 [file_pusher.py:join():182] waiting for file pusher
|
290 |
+
2021-07-15 02:32:06,474 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: get_summary
|
291 |
+
2021-07-15 02:32:06,475 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: sampled_history
|
292 |
+
2021-07-15 02:32:06,475 DEBUG HandlerThread:644701 [handler.py:handle_request():124] handle_request: shutdown
|
293 |
+
2021-07-15 02:32:06,475 INFO HandlerThread:644701 [handler.py:finish():638] shutting down handler
|
294 |
+
2021-07-15 02:32:07,372 INFO WriterThread:644701 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb
|
295 |
+
2021-07-15 02:32:07,473 INFO SenderThread:644701 [sender.py:finish():945] shutting down sender
|
296 |
+
2021-07-15 02:32:07,473 INFO SenderThread:644701 [file_pusher.py:finish():177] shutting down file pusher
|
297 |
+
2021-07-15 02:32:07,473 INFO SenderThread:644701 [file_pusher.py:join():182] waiting for file pusher
|
298 |
+
2021-07-15 02:32:07,477 INFO MainThread:644701 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210715_021559-38yj0n5v/logs/debug.log
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/logs/debug.log
|
4 |
+
2021-07-15 02:15:59,047 INFO MainThread:643445 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_021559-38yj0n5v/logs/debug-internal.log
|
5 |
+
2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-15 02:15:59,048 INFO MainThread:643445 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-15 02:15:59,048 INFO MainThread:643445 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-15 02:15:59,097 INFO MainThread:643445 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-15 02:15:59,145 INFO MainThread:643445 [backend.py:ensure_launched():139] started backend process with pid: 644701
|
12 |
+
2021-07-15 02:15:59,147 INFO MainThread:643445 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-15 02:15:59,150 INFO MainThread:643445 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-15 02:15:59,151 INFO MainThread:643445 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-15 02:15:59,832 INFO MainThread:643445 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-15 02:15:59,832 INFO MainThread:643445 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-15 02:16:00,007 INFO MainThread:643445 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-15 02:16:01,157 INFO MainThread:643445 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-15 02:16:01,158 INFO MainThread:643445 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-15 02:16:01,158 INFO MainThread:643445 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-15 02:16:01,160 INFO MainThread:643445 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-15 02:16:01,160 INFO MainThread:643445 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-15 02:16:01,168 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-15-50_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 20000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-15 02:16:01,170 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
+
2021-07-15 02:16:01,171 INFO MainThread:643445 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
|
26 |
+
2021-07-15 02:32:02,250 INFO MainThread:643445 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-15 02:32:02,251 INFO MainThread:643445 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-15 02:32:04,478 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1375
|
33 |
+
total_bytes: 1375
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-15 02:32:04,750 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1375
|
41 |
+
total_bytes: 1375
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-15 02:32:05,454 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 3
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1375
|
49 |
+
total_bytes: 6341
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-15 02:32:05,557 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1375
|
57 |
+
total_bytes: 10910
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-15 02:32:05,659 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 10910
|
65 |
+
total_bytes: 10910
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-15 02:32:05,761 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 10910
|
73 |
+
total_bytes: 10910
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-15 02:32:05,864 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 10910
|
81 |
+
total_bytes: 10910
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-15 02:32:05,966 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 10910
|
89 |
+
total_bytes: 10910
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-15 02:32:06,068 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 10910
|
97 |
+
total_bytes: 10910
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-15 02:32:06,371 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 10910
|
105 |
+
total_bytes: 10910
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-15 02:32:06,473 INFO MainThread:643445 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 10910
|
116 |
+
total_bytes: 10910
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-15 02:32:07,796 INFO MainThread:643445 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210715_021559-38yj0n5v/run-38yj0n5v.wandb
ADDED
Binary file (14.5 kB). View file
|
|
wandb/run-20210715_023352-28io0kfl/files/config.yaml
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 1
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 3.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul15_02-33-44_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 50
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 500
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 1
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_steps:
|
255 |
+
desc: null
|
256 |
+
value: 40000
|
257 |
+
save_strategy:
|
258 |
+
desc: null
|
259 |
+
value: IntervalStrategy.STEPS
|
260 |
+
save_total_limit:
|
261 |
+
desc: null
|
262 |
+
value: 5
|
263 |
+
seed:
|
264 |
+
desc: null
|
265 |
+
value: 42
|
266 |
+
sharded_ddp:
|
267 |
+
desc: null
|
268 |
+
value: []
|
269 |
+
skip_memory_metrics:
|
270 |
+
desc: null
|
271 |
+
value: true
|
272 |
+
tokenizer_name:
|
273 |
+
desc: null
|
274 |
+
value: ./
|
275 |
+
tpu_metrics_debug:
|
276 |
+
desc: null
|
277 |
+
value: false
|
278 |
+
tpu_num_cores:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
train_ref_file:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
use_fast_tokenizer:
|
285 |
+
desc: null
|
286 |
+
value: true
|
287 |
+
use_legacy_prediction_loop:
|
288 |
+
desc: null
|
289 |
+
value: false
|
290 |
+
validation_ref_file:
|
291 |
+
desc: null
|
292 |
+
value: null
|
293 |
+
validation_split_percentage:
|
294 |
+
desc: null
|
295 |
+
value: 5
|
296 |
+
warmup_ratio:
|
297 |
+
desc: null
|
298 |
+
value: 0.0
|
299 |
+
warmup_steps:
|
300 |
+
desc: null
|
301 |
+
value: 10000
|
302 |
+
weight_decay:
|
303 |
+
desc: null
|
304 |
+
value: 0.0095
|
wandb/run-20210715_023352-28io0kfl/files/output.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
2 |
+
warnings.warn(
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
|
6 |
+
Training...: 0%| | 0/705533 [02:06<?, ?it/s]
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [12:48<?, ?it/s]
|
8 |
+
Traceback (most recent call last):
|
9 |
+
File "./run_mlm_flax_no_accum.py", line 699, in <module>
|
10 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
11 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
|
12 |
+
return fun(*args, **kwargs)
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
|
14 |
+
out = pxla.xla_pmap(
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
|
16 |
+
return call_bind(self, fun, *args, **params)
|
17 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
|
18 |
+
outs = primitive.process(top_trace, fun, tracers, params)
|
19 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
|
20 |
+
return trace.process_map(self, fun, tracers, params)
|
21 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
|
22 |
+
return primitive.impl(f, *tracers, **params)
|
23 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
|
24 |
+
return compiled_fun(*args)
|
25 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
26 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
27 |
+
jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 3.27G free, 0B reserved, and 3.22G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
28 |
+
The stack trace below excludes JAX-internal frames.
|
29 |
+
The preceding is the original exception that occurred, unmodified.
|
30 |
+
--------------------
|
31 |
+
The above exception was the direct cause of the following exception:
|
32 |
+
Traceback (most recent call last):
|
33 |
+
File "./run_mlm_flax_no_accum.py", line 699, in <module>
|
34 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
35 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
36 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
37 |
+
RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 3.27G free, 0B reserved, and 3.22G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
wandb/run-20210715_023352-28io0kfl/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-15T02:33:54.743234",
|
5 |
+
"startedAt": "2021-07-15T02:33:52.730317",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=50",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=40000",
|
26 |
+
"--learning_rate=3e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=500"
|
31 |
+
],
|
32 |
+
"state": "running",
|
33 |
+
"program": "./run_mlm_flax_no_accum.py",
|
34 |
+
"codePath": "run_mlm_flax_no_accum.py",
|
35 |
+
"git": {
|
36 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
37 |
+
"commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303"
|
38 |
+
},
|
39 |
+
"email": null,
|
40 |
+
"root": "/home/dat/pino-roberta-base",
|
41 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
42 |
+
"username": "dat",
|
43 |
+
"executable": "/home/dat/pino/bin/python"
|
44 |
+
}
|
wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log
ADDED
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 02:33:53,426 INFO MainThread:647413 [internal.py:wandb_internal():88] W&B internal server running at pid: 647413, started at: 2021-07-15 02:33:53.426396
|
2 |
+
2021-07-15 02:33:53,428 INFO WriterThread:647413 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb
|
3 |
+
2021-07-15 02:33:53,429 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: check_version
|
4 |
+
2021-07-15 02:33:53,430 DEBUG SenderThread:647413 [sender.py:send():179] send: header
|
5 |
+
2021-07-15 02:33:53,430 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-15 02:33:53,471 DEBUG SenderThread:647413 [sender.py:send():179] send: run
|
7 |
+
2021-07-15 02:33:53,641 INFO SenderThread:647413 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files
|
8 |
+
2021-07-15 02:33:53,641 INFO SenderThread:647413 [sender.py:_start_run_threads():716] run started: 28io0kfl with start time 1626316432
|
9 |
+
2021-07-15 02:33:53,641 DEBUG SenderThread:647413 [sender.py:send():179] send: summary
|
10 |
+
2021-07-15 02:33:53,641 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: run_start
|
11 |
+
2021-07-15 02:33:53,642 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
12 |
+
2021-07-15 02:33:54,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json
|
13 |
+
2021-07-15 02:33:54,742 DEBUG HandlerThread:647413 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-15 02:33:54,743 DEBUG HandlerThread:647413 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-15 02:33:54,743 DEBUG HandlerThread:647413 [meta.py:probe():210] probe
|
16 |
+
2021-07-15 02:33:54,744 DEBUG HandlerThread:647413 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-15 02:33:54,775 DEBUG HandlerThread:647413 [meta.py:probe():252] probe done
|
21 |
+
2021-07-15 02:33:54,778 DEBUG SenderThread:647413 [sender.py:send():179] send: files
|
22 |
+
2021-07-15 02:33:54,779 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-15 02:33:54,786 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-15 02:33:54,787 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config
|
26 |
+
2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config
|
27 |
+
2021-07-15 02:33:54,917 DEBUG SenderThread:647413 [sender.py:send():179] send: config
|
28 |
+
2021-07-15 02:33:55,232 INFO Thread-11 :647413 [upload_job.py:push():137] Uploaded file /tmp/tmp3vyhbjkzwandb/34s07tos-wandb-metadata.json
|
29 |
+
2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt
|
30 |
+
2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json
|
31 |
+
2021-07-15 02:33:55,643 INFO Thread-8 :647413 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
|
32 |
+
2021-07-15 02:34:09,649 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
|
33 |
+
2021-07-15 02:34:09,919 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-15 02:34:09,919 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-15 02:34:22,827 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
36 |
+
2021-07-15 02:34:24,656 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml
|
37 |
+
2021-07-15 02:34:25,052 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
38 |
+
2021-07-15 02:34:25,052 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
39 |
+
2021-07-15 02:34:40,185 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
+
2021-07-15 02:34:40,186 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
41 |
+
2021-07-15 02:34:52,904 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
42 |
+
2021-07-15 02:34:55,321 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
43 |
+
2021-07-15 02:34:55,321 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
44 |
+
2021-07-15 02:35:10,455 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
45 |
+
2021-07-15 02:35:10,455 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
46 |
+
2021-07-15 02:35:22,977 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
47 |
+
2021-07-15 02:35:25,587 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
48 |
+
2021-07-15 02:35:25,587 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
49 |
+
2021-07-15 02:35:40,721 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
50 |
+
2021-07-15 02:35:40,722 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
51 |
+
2021-07-15 02:35:53,062 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
52 |
+
2021-07-15 02:35:55,856 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
53 |
+
2021-07-15 02:35:55,856 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
54 |
+
2021-07-15 02:36:10,989 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
55 |
+
2021-07-15 02:36:10,990 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
56 |
+
2021-07-15 02:36:23,136 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
57 |
+
2021-07-15 02:36:26,122 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
58 |
+
2021-07-15 02:36:26,123 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
59 |
+
2021-07-15 02:36:41,256 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
60 |
+
2021-07-15 02:36:41,257 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
61 |
+
2021-07-15 02:36:53,204 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
62 |
+
2021-07-15 02:36:56,393 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
63 |
+
2021-07-15 02:36:56,394 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
64 |
+
2021-07-15 02:37:11,526 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
65 |
+
2021-07-15 02:37:11,526 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
66 |
+
2021-07-15 02:37:23,277 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
67 |
+
2021-07-15 02:37:26,659 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
68 |
+
2021-07-15 02:37:26,659 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
69 |
+
2021-07-15 02:37:41,793 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
70 |
+
2021-07-15 02:37:41,793 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
71 |
+
2021-07-15 02:37:53,344 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
72 |
+
2021-07-15 02:37:56,927 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
73 |
+
2021-07-15 02:37:56,928 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
74 |
+
2021-07-15 02:38:12,060 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
75 |
+
2021-07-15 02:38:12,060 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
76 |
+
2021-07-15 02:38:23,410 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
77 |
+
2021-07-15 02:38:27,194 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
78 |
+
2021-07-15 02:38:27,194 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
79 |
+
2021-07-15 02:38:42,326 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
80 |
+
2021-07-15 02:38:42,326 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
81 |
+
2021-07-15 02:38:53,475 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
82 |
+
2021-07-15 02:38:57,457 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
83 |
+
2021-07-15 02:38:57,457 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
84 |
+
2021-07-15 02:39:12,589 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
85 |
+
2021-07-15 02:39:12,589 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
86 |
+
2021-07-15 02:39:23,542 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
87 |
+
2021-07-15 02:39:27,728 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
88 |
+
2021-07-15 02:39:27,728 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
89 |
+
2021-07-15 02:39:42,860 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
90 |
+
2021-07-15 02:39:42,860 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
91 |
+
2021-07-15 02:39:53,613 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
92 |
+
2021-07-15 02:39:57,993 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
93 |
+
2021-07-15 02:39:57,994 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
94 |
+
2021-07-15 02:40:13,128 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-15 02:40:13,128 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-15 02:40:23,681 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
97 |
+
2021-07-15 02:40:28,265 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
98 |
+
2021-07-15 02:40:28,266 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
99 |
+
2021-07-15 02:40:43,401 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-15 02:40:43,401 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-15 02:40:53,753 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
102 |
+
2021-07-15 02:40:58,548 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
103 |
+
2021-07-15 02:40:58,549 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
104 |
+
2021-07-15 02:41:13,683 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-15 02:41:13,684 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-15 02:41:23,828 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
107 |
+
2021-07-15 02:41:28,827 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
108 |
+
2021-07-15 02:41:28,827 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
109 |
+
2021-07-15 02:41:43,958 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
110 |
+
2021-07-15 02:41:43,958 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
111 |
+
2021-07-15 02:41:53,904 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
112 |
+
2021-07-15 02:41:59,090 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
113 |
+
2021-07-15 02:41:59,091 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
114 |
+
2021-07-15 02:42:14,225 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
115 |
+
2021-07-15 02:42:14,225 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
116 |
+
2021-07-15 02:42:23,978 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
117 |
+
2021-07-15 02:42:31,120 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
118 |
+
2021-07-15 02:42:31,120 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
119 |
+
2021-07-15 02:42:46,253 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
120 |
+
2021-07-15 02:42:46,253 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
121 |
+
2021-07-15 02:42:54,050 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
122 |
+
2021-07-15 02:43:01,385 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
123 |
+
2021-07-15 02:43:01,385 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
124 |
+
2021-07-15 02:43:16,523 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
125 |
+
2021-07-15 02:43:16,524 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
126 |
+
2021-07-15 02:43:24,121 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
127 |
+
2021-07-15 02:43:31,656 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
128 |
+
2021-07-15 02:43:31,657 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
129 |
+
2021-07-15 02:43:46,789 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
130 |
+
2021-07-15 02:43:46,790 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
131 |
+
2021-07-15 02:43:54,190 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
132 |
+
2021-07-15 02:44:01,924 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
133 |
+
2021-07-15 02:44:01,925 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
134 |
+
2021-07-15 02:44:17,056 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
135 |
+
2021-07-15 02:44:17,057 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
136 |
+
2021-07-15 02:44:24,264 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
137 |
+
2021-07-15 02:44:32,190 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
138 |
+
2021-07-15 02:44:32,190 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
139 |
+
2021-07-15 02:44:47,325 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
140 |
+
2021-07-15 02:44:47,326 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
141 |
+
2021-07-15 02:44:51,894 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
|
142 |
+
2021-07-15 02:44:54,337 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
143 |
+
2021-07-15 02:45:02,471 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
144 |
+
2021-07-15 02:45:02,472 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
145 |
+
2021-07-15 02:45:17,619 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
146 |
+
2021-07-15 02:45:17,619 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
147 |
+
2021-07-15 02:45:24,415 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
148 |
+
2021-07-15 02:45:32,753 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
149 |
+
2021-07-15 02:45:32,754 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
150 |
+
2021-07-15 02:45:47,896 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
151 |
+
2021-07-15 02:45:47,897 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
152 |
+
2021-07-15 02:45:54,500 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
153 |
+
2021-07-15 02:46:03,028 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
154 |
+
2021-07-15 02:46:03,028 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
155 |
+
2021-07-15 02:46:18,161 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
156 |
+
2021-07-15 02:46:18,162 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
157 |
+
2021-07-15 02:46:24,580 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
158 |
+
2021-07-15 02:46:33,296 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
159 |
+
2021-07-15 02:46:33,297 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
160 |
+
2021-07-15 02:46:48,441 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: stop_status
|
161 |
+
2021-07-15 02:46:48,441 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: stop_status
|
162 |
+
2021-07-15 02:46:54,662 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
163 |
+
2021-07-15 02:46:57,942 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
|
164 |
+
2021-07-15 02:46:58,807 DEBUG SenderThread:647413 [sender.py:send():179] send: telemetry
|
165 |
+
2021-07-15 02:46:58,807 DEBUG SenderThread:647413 [sender.py:send():179] send: exit
|
166 |
+
2021-07-15 02:46:58,807 INFO SenderThread:647413 [sender.py:send_exit():287] handling exit code: 1
|
167 |
+
2021-07-15 02:46:58,809 INFO SenderThread:647413 [sender.py:send_exit():295] send defer
|
168 |
+
2021-07-15 02:46:58,809 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
169 |
+
2021-07-15 02:46:58,809 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
170 |
+
2021-07-15 02:46:58,809 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 0
|
171 |
+
2021-07-15 02:46:58,810 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
172 |
+
2021-07-15 02:46:58,810 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
173 |
+
2021-07-15 02:46:58,810 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 0
|
174 |
+
2021-07-15 02:46:58,810 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 1
|
175 |
+
2021-07-15 02:46:58,811 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
176 |
+
2021-07-15 02:46:58,811 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 1
|
177 |
+
2021-07-15 02:46:58,873 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
178 |
+
2021-07-15 02:46:58,873 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 1
|
179 |
+
2021-07-15 02:46:58,873 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 2
|
180 |
+
2021-07-15 02:46:58,874 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
181 |
+
2021-07-15 02:46:58,874 DEBUG SenderThread:647413 [sender.py:send():179] send: stats
|
182 |
+
2021-07-15 02:46:58,874 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 2
|
183 |
+
2021-07-15 02:46:58,875 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
184 |
+
2021-07-15 02:46:58,875 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 2
|
185 |
+
2021-07-15 02:46:58,875 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 3
|
186 |
+
2021-07-15 02:46:58,875 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
187 |
+
2021-07-15 02:46:58,876 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 3
|
188 |
+
2021-07-15 02:46:58,876 DEBUG SenderThread:647413 [sender.py:send():179] send: summary
|
189 |
+
2021-07-15 02:46:58,876 INFO SenderThread:647413 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
190 |
+
2021-07-15 02:46:58,877 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
191 |
+
2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 3
|
192 |
+
2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 4
|
193 |
+
2021-07-15 02:46:58,877 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
194 |
+
2021-07-15 02:46:58,877 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 4
|
195 |
+
2021-07-15 02:46:58,877 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
196 |
+
2021-07-15 02:46:58,877 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 4
|
197 |
+
2021-07-15 02:46:58,913 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
198 |
+
2021-07-15 02:46:58,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json
|
199 |
+
2021-07-15 02:46:58,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
|
200 |
+
2021-07-15 02:46:59,055 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 5
|
201 |
+
2021-07-15 02:46:59,055 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
202 |
+
2021-07-15 02:46:59,055 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
203 |
+
2021-07-15 02:46:59,055 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 5
|
204 |
+
2021-07-15 02:46:59,056 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
205 |
+
2021-07-15 02:46:59,056 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 5
|
206 |
+
2021-07-15 02:46:59,056 INFO SenderThread:647413 [dir_watcher.py:finish():282] shutting down directory watcher
|
207 |
+
2021-07-15 02:46:59,157 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
208 |
+
2021-07-15 02:46:59,943 INFO Thread-8 :647413 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml
|
209 |
+
2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files
|
210 |
+
2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt requirements.txt
|
211 |
+
2021-07-15 02:46:59,944 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log output.log
|
212 |
+
2021-07-15 02:46:59,945 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-metadata.json wandb-metadata.json
|
213 |
+
2021-07-15 02:46:59,945 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml config.yaml
|
214 |
+
2021-07-15 02:46:59,948 INFO SenderThread:647413 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json wandb-summary.json
|
215 |
+
2021-07-15 02:46:59,951 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 6
|
216 |
+
2021-07-15 02:46:59,951 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
217 |
+
2021-07-15 02:46:59,952 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
218 |
+
2021-07-15 02:46:59,953 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 6
|
219 |
+
2021-07-15 02:46:59,956 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
220 |
+
2021-07-15 02:46:59,956 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 6
|
221 |
+
2021-07-15 02:46:59,956 INFO SenderThread:647413 [file_pusher.py:finish():177] shutting down file pusher
|
222 |
+
2021-07-15 02:47:00,054 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
223 |
+
2021-07-15 02:47:00,054 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
224 |
+
2021-07-15 02:47:00,157 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
225 |
+
2021-07-15 02:47:00,157 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
226 |
+
2021-07-15 02:47:00,259 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
227 |
+
2021-07-15 02:47:00,259 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
228 |
+
2021-07-15 02:47:00,361 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
229 |
+
2021-07-15 02:47:00,362 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
230 |
+
2021-07-15 02:47:00,377 INFO Thread-14 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/config.yaml
|
231 |
+
2021-07-15 02:47:00,382 INFO Thread-12 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/requirements.txt
|
232 |
+
2021-07-15 02:47:00,415 INFO Thread-15 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/wandb-summary.json
|
233 |
+
2021-07-15 02:47:00,439 INFO Thread-13 :647413 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/files/output.log
|
234 |
+
2021-07-15 02:47:00,464 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
235 |
+
2021-07-15 02:47:00,464 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
236 |
+
2021-07-15 02:47:00,566 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
237 |
+
2021-07-15 02:47:00,566 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
238 |
+
2021-07-15 02:47:00,640 INFO Thread-7 :647413 [sender.py:transition_state():308] send defer: 7
|
239 |
+
2021-07-15 02:47:00,640 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
240 |
+
2021-07-15 02:47:00,640 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 7
|
241 |
+
2021-07-15 02:47:00,641 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
242 |
+
2021-07-15 02:47:00,641 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 7
|
243 |
+
2021-07-15 02:47:00,668 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
244 |
+
2021-07-15 02:47:00,919 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 8
|
245 |
+
2021-07-15 02:47:00,920 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
246 |
+
2021-07-15 02:47:00,920 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
247 |
+
2021-07-15 02:47:00,920 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 8
|
248 |
+
2021-07-15 02:47:00,921 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
249 |
+
2021-07-15 02:47:00,921 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 8
|
250 |
+
2021-07-15 02:47:00,921 INFO SenderThread:647413 [sender.py:transition_state():308] send defer: 9
|
251 |
+
2021-07-15 02:47:00,921 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: defer
|
252 |
+
2021-07-15 02:47:00,921 INFO HandlerThread:647413 [handler.py:handle_request_defer():141] handle defer: 9
|
253 |
+
2021-07-15 02:47:00,921 DEBUG SenderThread:647413 [sender.py:send():179] send: final
|
254 |
+
2021-07-15 02:47:00,922 DEBUG SenderThread:647413 [sender.py:send():179] send: footer
|
255 |
+
2021-07-15 02:47:00,922 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: defer
|
256 |
+
2021-07-15 02:47:00,922 INFO SenderThread:647413 [sender.py:send_request_defer():304] handle sender defer: 9
|
257 |
+
2021-07-15 02:47:01,022 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: poll_exit
|
258 |
+
2021-07-15 02:47:01,022 DEBUG SenderThread:647413 [sender.py:send_request():193] send_request: poll_exit
|
259 |
+
2021-07-15 02:47:01,022 INFO SenderThread:647413 [file_pusher.py:join():182] waiting for file pusher
|
260 |
+
2021-07-15 02:47:01,024 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: get_summary
|
261 |
+
2021-07-15 02:47:01,024 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: sampled_history
|
262 |
+
2021-07-15 02:47:01,025 DEBUG HandlerThread:647413 [handler.py:handle_request():124] handle_request: shutdown
|
263 |
+
2021-07-15 02:47:01,025 INFO HandlerThread:647413 [handler.py:finish():638] shutting down handler
|
264 |
+
2021-07-15 02:47:01,922 INFO WriterThread:647413 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb
|
265 |
+
2021-07-15 02:47:02,023 INFO SenderThread:647413 [sender.py:finish():945] shutting down sender
|
266 |
+
2021-07-15 02:47:02,023 INFO SenderThread:647413 [file_pusher.py:finish():177] shutting down file pusher
|
267 |
+
2021-07-15 02:47:02,023 INFO SenderThread:647413 [file_pusher.py:join():182] waiting for file pusher
|
268 |
+
2021-07-15 02:47:02,026 INFO MainThread:647413 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210715_023352-28io0kfl/logs/debug.log
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 02:33:52,731 INFO MainThread:646155 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/logs/debug.log
|
4 |
+
2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_023352-28io0kfl/logs/debug-internal.log
|
5 |
+
2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-15 02:33:52,732 INFO MainThread:646155 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-15 02:33:52,732 INFO MainThread:646155 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-15 02:33:52,780 INFO MainThread:646155 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-15 02:33:52,826 INFO MainThread:646155 [backend.py:ensure_launched():139] started backend process with pid: 647413
|
12 |
+
2021-07-15 02:33:52,828 INFO MainThread:646155 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-15 02:33:52,831 INFO MainThread:646155 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-15 02:33:52,832 INFO MainThread:646155 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-15 02:33:53,470 INFO MainThread:646155 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-15 02:33:53,470 INFO MainThread:646155 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-15 02:33:53,641 INFO MainThread:646155 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-15 02:33:54,781 INFO MainThread:646155 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-15 02:33:54,782 INFO MainThread:646155 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-15 02:33:54,783 INFO MainThread:646155 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-15 02:33:54,784 INFO MainThread:646155 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-15 02:33:54,785 INFO MainThread:646155 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-15 02:33:54,790 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-33-44_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 40000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-15 02:33:54,792 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
+
2021-07-15 02:33:54,793 INFO MainThread:646155 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
|
26 |
+
2021-07-15 02:46:56,604 INFO MainThread:646155 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-15 02:46:56,605 INFO MainThread:646155 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-15 02:46:58,811 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1375
|
33 |
+
total_bytes: 1375
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-15 02:46:59,056 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1375
|
41 |
+
total_bytes: 1375
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-15 02:46:59,953 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 4
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1375
|
49 |
+
total_bytes: 10904
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-15 02:47:00,055 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1375
|
57 |
+
total_bytes: 10906
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-15 02:47:00,158 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 10906
|
65 |
+
total_bytes: 10906
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-15 02:47:00,260 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 10906
|
73 |
+
total_bytes: 10906
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-15 02:47:00,362 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 10906
|
81 |
+
total_bytes: 10906
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-15 02:47:00,465 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 10906
|
89 |
+
total_bytes: 10906
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-15 02:47:00,567 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 10906
|
97 |
+
total_bytes: 10906
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-15 02:47:00,920 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 10906
|
105 |
+
total_bytes: 10906
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-15 02:47:01,023 INFO MainThread:646155 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 10906
|
116 |
+
total_bytes: 10906
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-15 02:47:02,337 INFO MainThread:646155 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210715_023352-28io0kfl/run-28io0kfl.wandb
ADDED
Binary file (13 kB). View file
|
|
wandb/run-20210715_024816-39ztwpif/files/config.yaml
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 1
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 3.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul15_02-48-08_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 50
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 500
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 1
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_steps:
|
255 |
+
desc: null
|
256 |
+
value: 40000
|
257 |
+
save_strategy:
|
258 |
+
desc: null
|
259 |
+
value: IntervalStrategy.STEPS
|
260 |
+
save_total_limit:
|
261 |
+
desc: null
|
262 |
+
value: 5
|
263 |
+
seed:
|
264 |
+
desc: null
|
265 |
+
value: 42
|
266 |
+
sharded_ddp:
|
267 |
+
desc: null
|
268 |
+
value: []
|
269 |
+
skip_memory_metrics:
|
270 |
+
desc: null
|
271 |
+
value: true
|
272 |
+
tokenizer_name:
|
273 |
+
desc: null
|
274 |
+
value: ./
|
275 |
+
tpu_metrics_debug:
|
276 |
+
desc: null
|
277 |
+
value: false
|
278 |
+
tpu_num_cores:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
train_ref_file:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
use_fast_tokenizer:
|
285 |
+
desc: null
|
286 |
+
value: true
|
287 |
+
use_legacy_prediction_loop:
|
288 |
+
desc: null
|
289 |
+
value: false
|
290 |
+
validation_ref_file:
|
291 |
+
desc: null
|
292 |
+
value: null
|
293 |
+
validation_split_percentage:
|
294 |
+
desc: null
|
295 |
+
value: 5
|
296 |
+
warmup_ratio:
|
297 |
+
desc: null
|
298 |
+
value: 0.0
|
299 |
+
warmup_steps:
|
300 |
+
desc: null
|
301 |
+
value: 10000
|
302 |
+
weight_decay:
|
303 |
+
desc: null
|
304 |
+
value: 0.0095
|
wandb/run-20210715_024816-39ztwpif/files/output.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
2 |
+
warnings.warn(
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
|
6 |
+
Training...: 0%| | 0/503952 [02:05<?, ?it/s]
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [09:54<?, ?it/s]
|
8 |
+
Traceback (most recent call last):
|
9 |
+
File "./run_mlm_flax_no_accum.py", line 699, in <module>
|
10 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
11 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
|
12 |
+
return fun(*args, **kwargs)
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
|
14 |
+
out = pxla.xla_pmap(
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
|
16 |
+
return call_bind(self, fun, *args, **params)
|
17 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
|
18 |
+
outs = primitive.process(top_trace, fun, tracers, params)
|
19 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
|
20 |
+
return trace.process_map(self, fun, tracers, params)
|
21 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
|
22 |
+
return primitive.impl(f, *tracers, **params)
|
23 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
|
24 |
+
return compiled_fun(*args)
|
25 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
26 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
27 |
+
jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 6.35G free, 0B reserved, and 6.31G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
28 |
+
The stack trace below excludes JAX-internal frames.
|
29 |
+
The preceding is the original exception that occurred, unmodified.
|
30 |
+
--------------------
|
31 |
+
The above exception was the direct cause of the following exception:
|
32 |
+
Traceback (most recent call last):
|
33 |
+
File "./run_mlm_flax_no_accum.py", line 699, in <module>
|
34 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
35 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
36 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
37 |
+
RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 6.35G free, 0B reserved, and 6.31G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
wandb/run-20210715_024816-39ztwpif/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-15T02:48:18.837710",
|
5 |
+
"startedAt": "2021-07-15T02:48:16.824799",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=50",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=40000",
|
26 |
+
"--learning_rate=3e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=500"
|
31 |
+
],
|
32 |
+
"state": "running",
|
33 |
+
"program": "./run_mlm_flax_no_accum.py",
|
34 |
+
"codePath": "run_mlm_flax_no_accum.py",
|
35 |
+
"git": {
|
36 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
37 |
+
"commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303"
|
38 |
+
},
|
39 |
+
"email": null,
|
40 |
+
"root": "/home/dat/pino-roberta-base",
|
41 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
42 |
+
"username": "dat",
|
43 |
+
"executable": "/home/dat/pino/bin/python"
|
44 |
+
}
|
wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log
ADDED
@@ -0,0 +1,240 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 02:48:17,521 INFO MainThread:649905 [internal.py:wandb_internal():88] W&B internal server running at pid: 649905, started at: 2021-07-15 02:48:17.521263
|
2 |
+
2021-07-15 02:48:17,523 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-15 02:48:17,524 INFO WriterThread:649905 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb
|
4 |
+
2021-07-15 02:48:17,524 DEBUG SenderThread:649905 [sender.py:send():179] send: header
|
5 |
+
2021-07-15 02:48:17,525 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-15 02:48:17,561 DEBUG SenderThread:649905 [sender.py:send():179] send: run
|
7 |
+
2021-07-15 02:48:17,732 INFO SenderThread:649905 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files
|
8 |
+
2021-07-15 02:48:17,732 INFO SenderThread:649905 [sender.py:_start_run_threads():716] run started: 39ztwpif with start time 1626317296
|
9 |
+
2021-07-15 02:48:17,732 DEBUG SenderThread:649905 [sender.py:send():179] send: summary
|
10 |
+
2021-07-15 02:48:17,732 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-15 02:48:17,733 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-15 02:48:18,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json
|
13 |
+
2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-15 02:48:18,837 DEBUG HandlerThread:649905 [meta.py:probe():210] probe
|
16 |
+
2021-07-15 02:48:18,838 DEBUG HandlerThread:649905 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-15 02:48:18,868 DEBUG HandlerThread:649905 [meta.py:probe():252] probe done
|
21 |
+
2021-07-15 02:48:18,871 DEBUG SenderThread:649905 [sender.py:send():179] send: files
|
22 |
+
2021-07-15 02:48:18,872 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-15 02:48:18,877 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-15 02:48:18,878 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config
|
26 |
+
2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config
|
27 |
+
2021-07-15 02:48:19,006 DEBUG SenderThread:649905 [sender.py:send():179] send: config
|
28 |
+
2021-07-15 02:48:19,311 INFO Thread-11 :649905 [upload_job.py:push():137] Uploaded file /tmp/tmpgcnix6scwandb/15nx6xdi-wandb-metadata.json
|
29 |
+
2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt
|
30 |
+
2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json
|
31 |
+
2021-07-15 02:48:19,733 INFO Thread-8 :649905 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
|
32 |
+
2021-07-15 02:48:33,738 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
|
33 |
+
2021-07-15 02:48:34,008 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-15 02:48:34,009 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-15 02:48:46,922 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
36 |
+
2021-07-15 02:48:48,744 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml
|
37 |
+
2021-07-15 02:48:49,147 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
38 |
+
2021-07-15 02:48:49,147 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
39 |
+
2021-07-15 02:49:04,279 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
+
2021-07-15 02:49:04,280 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
41 |
+
2021-07-15 02:49:17,003 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
42 |
+
2021-07-15 02:49:19,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
43 |
+
2021-07-15 02:49:19,412 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
44 |
+
2021-07-15 02:49:34,543 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
45 |
+
2021-07-15 02:49:34,543 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
46 |
+
2021-07-15 02:49:47,079 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
47 |
+
2021-07-15 02:49:49,677 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
48 |
+
2021-07-15 02:49:49,677 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
49 |
+
2021-07-15 02:50:04,809 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
50 |
+
2021-07-15 02:50:04,809 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
51 |
+
2021-07-15 02:50:17,143 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
52 |
+
2021-07-15 02:50:19,943 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
53 |
+
2021-07-15 02:50:19,943 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
54 |
+
2021-07-15 02:50:35,077 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
55 |
+
2021-07-15 02:50:35,078 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
56 |
+
2021-07-15 02:50:47,219 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
57 |
+
2021-07-15 02:50:50,223 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
58 |
+
2021-07-15 02:50:50,223 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
59 |
+
2021-07-15 02:51:05,389 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
60 |
+
2021-07-15 02:51:05,389 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
61 |
+
2021-07-15 02:51:17,291 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
62 |
+
2021-07-15 02:51:20,521 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
63 |
+
2021-07-15 02:51:20,521 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
64 |
+
2021-07-15 02:51:35,655 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
65 |
+
2021-07-15 02:51:35,655 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
66 |
+
2021-07-15 02:51:47,368 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
67 |
+
2021-07-15 02:51:50,786 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
68 |
+
2021-07-15 02:51:50,786 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
69 |
+
2021-07-15 02:52:05,917 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
70 |
+
2021-07-15 02:52:05,917 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
71 |
+
2021-07-15 02:52:17,445 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
72 |
+
2021-07-15 02:52:21,058 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
73 |
+
2021-07-15 02:52:21,058 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
74 |
+
2021-07-15 02:52:36,188 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
75 |
+
2021-07-15 02:52:36,189 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
76 |
+
2021-07-15 02:52:47,519 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
77 |
+
2021-07-15 02:52:51,318 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
78 |
+
2021-07-15 02:52:51,318 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
79 |
+
2021-07-15 02:53:06,454 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
80 |
+
2021-07-15 02:53:06,454 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
81 |
+
2021-07-15 02:53:17,587 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
82 |
+
2021-07-15 02:53:21,586 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
83 |
+
2021-07-15 02:53:21,586 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
84 |
+
2021-07-15 02:53:36,717 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
85 |
+
2021-07-15 02:53:36,718 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
86 |
+
2021-07-15 02:53:47,654 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
87 |
+
2021-07-15 02:53:51,851 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
88 |
+
2021-07-15 02:53:51,851 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
89 |
+
2021-07-15 02:54:06,983 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
90 |
+
2021-07-15 02:54:06,983 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
91 |
+
2021-07-15 02:54:17,727 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
92 |
+
2021-07-15 02:54:22,115 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
93 |
+
2021-07-15 02:54:22,115 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
94 |
+
2021-07-15 02:54:37,245 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-15 02:54:37,246 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-15 02:54:47,796 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
97 |
+
2021-07-15 02:54:52,379 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
98 |
+
2021-07-15 02:54:52,379 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
99 |
+
2021-07-15 02:55:07,511 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-15 02:55:07,511 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-15 02:55:17,864 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
102 |
+
2021-07-15 02:55:22,641 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
103 |
+
2021-07-15 02:55:22,641 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
104 |
+
2021-07-15 02:55:37,785 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-15 02:55:37,786 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-15 02:55:47,933 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
107 |
+
2021-07-15 02:55:52,928 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
108 |
+
2021-07-15 02:55:52,929 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
109 |
+
2021-07-15 02:56:08,060 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
110 |
+
2021-07-15 02:56:08,060 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
111 |
+
2021-07-15 02:56:18,007 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
112 |
+
2021-07-15 02:56:23,209 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
113 |
+
2021-07-15 02:56:23,210 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
114 |
+
2021-07-15 02:56:23,919 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
|
115 |
+
2021-07-15 02:56:38,372 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
116 |
+
2021-07-15 02:56:38,372 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
117 |
+
2021-07-15 02:56:48,082 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
118 |
+
2021-07-15 02:56:53,514 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
119 |
+
2021-07-15 02:56:53,514 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
120 |
+
2021-07-15 02:57:08,654 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
121 |
+
2021-07-15 02:57:08,655 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
122 |
+
2021-07-15 02:57:18,162 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
123 |
+
2021-07-15 02:57:23,787 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
124 |
+
2021-07-15 02:57:23,787 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
125 |
+
2021-07-15 02:57:38,920 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
126 |
+
2021-07-15 02:57:38,920 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
127 |
+
2021-07-15 02:57:48,241 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
128 |
+
2021-07-15 02:57:54,061 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
129 |
+
2021-07-15 02:57:54,061 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
130 |
+
2021-07-15 02:58:09,194 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
131 |
+
2021-07-15 02:58:09,195 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
132 |
+
2021-07-15 02:58:18,311 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
133 |
+
2021-07-15 02:58:24,331 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: stop_status
|
134 |
+
2021-07-15 02:58:24,331 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: stop_status
|
135 |
+
2021-07-15 02:58:27,972 INFO Thread-8 :649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
|
136 |
+
2021-07-15 02:58:29,408 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
137 |
+
2021-07-15 02:58:29,409 DEBUG SenderThread:649905 [sender.py:send():179] send: telemetry
|
138 |
+
2021-07-15 02:58:29,409 DEBUG SenderThread:649905 [sender.py:send():179] send: exit
|
139 |
+
2021-07-15 02:58:29,409 INFO SenderThread:649905 [sender.py:send_exit():287] handling exit code: 1
|
140 |
+
2021-07-15 02:58:29,411 INFO SenderThread:649905 [sender.py:send_exit():295] send defer
|
141 |
+
2021-07-15 02:58:29,411 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
142 |
+
2021-07-15 02:58:29,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
143 |
+
2021-07-15 02:58:29,412 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 0
|
144 |
+
2021-07-15 02:58:29,412 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
145 |
+
2021-07-15 02:58:29,412 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 0
|
146 |
+
2021-07-15 02:58:29,412 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 1
|
147 |
+
2021-07-15 02:58:29,412 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
148 |
+
2021-07-15 02:58:29,413 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 1
|
149 |
+
2021-07-15 02:58:29,440 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
150 |
+
2021-07-15 02:58:29,440 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 1
|
151 |
+
2021-07-15 02:58:29,440 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 2
|
152 |
+
2021-07-15 02:58:29,440 DEBUG SenderThread:649905 [sender.py:send():179] send: stats
|
153 |
+
2021-07-15 02:58:29,441 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
154 |
+
2021-07-15 02:58:29,441 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 2
|
155 |
+
2021-07-15 02:58:29,441 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
156 |
+
2021-07-15 02:58:29,441 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 2
|
157 |
+
2021-07-15 02:58:29,441 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 3
|
158 |
+
2021-07-15 02:58:29,442 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
159 |
+
2021-07-15 02:58:29,442 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 3
|
160 |
+
2021-07-15 02:58:29,442 DEBUG SenderThread:649905 [sender.py:send():179] send: summary
|
161 |
+
2021-07-15 02:58:29,442 INFO SenderThread:649905 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
162 |
+
2021-07-15 02:58:29,443 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
163 |
+
2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 3
|
164 |
+
2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 4
|
165 |
+
2021-07-15 02:58:29,443 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
166 |
+
2021-07-15 02:58:29,443 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 4
|
167 |
+
2021-07-15 02:58:29,443 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
168 |
+
2021-07-15 02:58:29,443 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 4
|
169 |
+
2021-07-15 02:58:29,513 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
170 |
+
2021-07-15 02:58:29,619 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 5
|
171 |
+
2021-07-15 02:58:29,619 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
172 |
+
2021-07-15 02:58:29,620 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
173 |
+
2021-07-15 02:58:29,620 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 5
|
174 |
+
2021-07-15 02:58:29,620 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
175 |
+
2021-07-15 02:58:29,620 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 5
|
176 |
+
2021-07-15 02:58:29,620 INFO SenderThread:649905 [dir_watcher.py:finish():282] shutting down directory watcher
|
177 |
+
2021-07-15 02:58:29,722 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
178 |
+
2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json
|
179 |
+
2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
|
180 |
+
2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml
|
181 |
+
2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files
|
182 |
+
2021-07-15 02:58:29,974 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt requirements.txt
|
183 |
+
2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log output.log
|
184 |
+
2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-metadata.json wandb-metadata.json
|
185 |
+
2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml config.yaml
|
186 |
+
2021-07-15 02:58:29,975 INFO SenderThread:649905 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json wandb-summary.json
|
187 |
+
2021-07-15 02:58:29,976 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 6
|
188 |
+
2021-07-15 02:58:29,976 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
189 |
+
2021-07-15 02:58:29,977 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
190 |
+
2021-07-15 02:58:29,983 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 6
|
191 |
+
2021-07-15 02:58:29,986 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
192 |
+
2021-07-15 02:58:29,989 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 6
|
193 |
+
2021-07-15 02:58:29,989 INFO SenderThread:649905 [file_pusher.py:finish():177] shutting down file pusher
|
194 |
+
2021-07-15 02:58:30,078 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
195 |
+
2021-07-15 02:58:30,078 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
196 |
+
2021-07-15 02:58:30,181 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
197 |
+
2021-07-15 02:58:30,181 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
198 |
+
2021-07-15 02:58:30,283 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
199 |
+
2021-07-15 02:58:30,283 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
200 |
+
2021-07-15 02:58:30,385 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
201 |
+
2021-07-15 02:58:30,385 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
202 |
+
2021-07-15 02:58:30,418 INFO Thread-13 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/output.log
|
203 |
+
2021-07-15 02:58:30,421 INFO Thread-14 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/config.yaml
|
204 |
+
2021-07-15 02:58:30,423 INFO Thread-12 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/requirements.txt
|
205 |
+
2021-07-15 02:58:30,451 INFO Thread-15 :649905 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/files/wandb-summary.json
|
206 |
+
2021-07-15 02:58:30,487 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
207 |
+
2021-07-15 02:58:30,487 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
208 |
+
2021-07-15 02:58:30,589 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
209 |
+
2021-07-15 02:58:30,589 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
210 |
+
2021-07-15 02:58:30,652 INFO Thread-7 :649905 [sender.py:transition_state():308] send defer: 7
|
211 |
+
2021-07-15 02:58:30,652 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
212 |
+
2021-07-15 02:58:30,653 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 7
|
213 |
+
2021-07-15 02:58:30,653 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
214 |
+
2021-07-15 02:58:30,653 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 7
|
215 |
+
2021-07-15 02:58:30,691 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
216 |
+
2021-07-15 02:58:30,941 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 8
|
217 |
+
2021-07-15 02:58:30,941 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
218 |
+
2021-07-15 02:58:30,941 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
219 |
+
2021-07-15 02:58:30,941 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 8
|
220 |
+
2021-07-15 02:58:30,942 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
221 |
+
2021-07-15 02:58:30,942 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 8
|
222 |
+
2021-07-15 02:58:30,942 INFO SenderThread:649905 [sender.py:transition_state():308] send defer: 9
|
223 |
+
2021-07-15 02:58:30,942 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: defer
|
224 |
+
2021-07-15 02:58:30,942 INFO HandlerThread:649905 [handler.py:handle_request_defer():141] handle defer: 9
|
225 |
+
2021-07-15 02:58:30,942 DEBUG SenderThread:649905 [sender.py:send():179] send: final
|
226 |
+
2021-07-15 02:58:30,943 DEBUG SenderThread:649905 [sender.py:send():179] send: footer
|
227 |
+
2021-07-15 02:58:30,943 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: defer
|
228 |
+
2021-07-15 02:58:30,943 INFO SenderThread:649905 [sender.py:send_request_defer():304] handle sender defer: 9
|
229 |
+
2021-07-15 02:58:31,043 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: poll_exit
|
230 |
+
2021-07-15 02:58:31,043 DEBUG SenderThread:649905 [sender.py:send_request():193] send_request: poll_exit
|
231 |
+
2021-07-15 02:58:31,043 INFO SenderThread:649905 [file_pusher.py:join():182] waiting for file pusher
|
232 |
+
2021-07-15 02:58:31,045 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: get_summary
|
233 |
+
2021-07-15 02:58:31,046 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: sampled_history
|
234 |
+
2021-07-15 02:58:31,046 DEBUG HandlerThread:649905 [handler.py:handle_request():124] handle_request: shutdown
|
235 |
+
2021-07-15 02:58:31,046 INFO HandlerThread:649905 [handler.py:finish():638] shutting down handler
|
236 |
+
2021-07-15 02:58:31,943 INFO WriterThread:649905 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb
|
237 |
+
2021-07-15 02:58:32,044 INFO SenderThread:649905 [sender.py:finish():945] shutting down sender
|
238 |
+
2021-07-15 02:58:32,044 INFO SenderThread:649905 [file_pusher.py:finish():177] shutting down file pusher
|
239 |
+
2021-07-15 02:58:32,044 INFO SenderThread:649905 [file_pusher.py:join():182] waiting for file pusher
|
240 |
+
2021-07-15 02:58:32,047 INFO MainThread:649905 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210715_024816-39ztwpif/logs/debug.log
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/logs/debug.log
|
4 |
+
2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_024816-39ztwpif/logs/debug-internal.log
|
5 |
+
2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-15 02:48:16,826 INFO MainThread:648648 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-15 02:48:16,827 INFO MainThread:648648 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-15 02:48:16,875 INFO MainThread:648648 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-15 02:48:16,922 INFO MainThread:648648 [backend.py:ensure_launched():139] started backend process with pid: 649905
|
12 |
+
2021-07-15 02:48:16,924 INFO MainThread:648648 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-15 02:48:16,927 INFO MainThread:648648 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-15 02:48:16,928 INFO MainThread:648648 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-15 02:48:17,560 INFO MainThread:648648 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-15 02:48:17,560 INFO MainThread:648648 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-15 02:48:17,732 INFO MainThread:648648 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-15 02:48:18,876 INFO MainThread:648648 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-15 02:48:18,877 INFO MainThread:648648 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-15 02:48:18,877 INFO MainThread:648648 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-15 02:48:18,879 INFO MainThread:648648 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-15 02:48:18,879 INFO MainThread:648648 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-15 02:48:18,886 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_02-48-08_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 40000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-15 02:48:18,888 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
+
2021-07-15 02:48:18,889 INFO MainThread:648648 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
|
26 |
+
2021-07-15 02:58:26,636 INFO MainThread:648648 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-15 02:58:26,637 INFO MainThread:648648 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-15 02:58:29,412 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1375
|
33 |
+
total_bytes: 1375
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-15 02:58:29,620 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1375
|
41 |
+
total_bytes: 1375
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-15 02:58:29,977 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 4
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1375
|
49 |
+
total_bytes: 10904
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-15 02:58:30,079 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1375
|
57 |
+
total_bytes: 10906
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-15 02:58:30,181 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 10906
|
65 |
+
total_bytes: 10906
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-15 02:58:30,284 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 10906
|
73 |
+
total_bytes: 10906
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-15 02:58:30,386 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 10906
|
81 |
+
total_bytes: 10906
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-15 02:58:30,488 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 10906
|
89 |
+
total_bytes: 10906
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-15 02:58:30,590 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 10906
|
97 |
+
total_bytes: 10906
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-15 02:58:30,942 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 10906
|
105 |
+
total_bytes: 10906
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-15 02:58:31,044 INFO MainThread:648648 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 10906
|
116 |
+
total_bytes: 10906
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-15 02:58:32,341 INFO MainThread:648648 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210715_024816-39ztwpif/run-39ztwpif.wandb
ADDED
Binary file (11.6 kB). View file
|
|
wandb/run-20210715_030015-30wihv4o/files/config.yaml
ADDED
@@ -0,0 +1,304 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
2:
|
17 |
+
- 3
|
18 |
+
- 11
|
19 |
+
4: 3.8.10
|
20 |
+
5: 0.10.33
|
21 |
+
6: 4.9.0.dev0
|
22 |
+
8:
|
23 |
+
- 5
|
24 |
+
adafactor:
|
25 |
+
desc: null
|
26 |
+
value: false
|
27 |
+
adam_beta1:
|
28 |
+
desc: null
|
29 |
+
value: 0.9
|
30 |
+
adam_beta2:
|
31 |
+
desc: null
|
32 |
+
value: 0.98
|
33 |
+
adam_epsilon:
|
34 |
+
desc: null
|
35 |
+
value: 1.0e-08
|
36 |
+
cache_dir:
|
37 |
+
desc: null
|
38 |
+
value: null
|
39 |
+
config_name:
|
40 |
+
desc: null
|
41 |
+
value: ./
|
42 |
+
dataloader_drop_last:
|
43 |
+
desc: null
|
44 |
+
value: false
|
45 |
+
dataloader_num_workers:
|
46 |
+
desc: null
|
47 |
+
value: 0
|
48 |
+
dataloader_pin_memory:
|
49 |
+
desc: null
|
50 |
+
value: true
|
51 |
+
dataset_config_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
dataset_name:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
ddp_find_unused_parameters:
|
58 |
+
desc: null
|
59 |
+
value: null
|
60 |
+
debug:
|
61 |
+
desc: null
|
62 |
+
value: []
|
63 |
+
deepspeed:
|
64 |
+
desc: null
|
65 |
+
value: null
|
66 |
+
disable_tqdm:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_eval:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_predict:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
do_train:
|
76 |
+
desc: null
|
77 |
+
value: false
|
78 |
+
dtype:
|
79 |
+
desc: null
|
80 |
+
value: float32
|
81 |
+
eval_accumulation_steps:
|
82 |
+
desc: null
|
83 |
+
value: null
|
84 |
+
eval_steps:
|
85 |
+
desc: null
|
86 |
+
value: 20000
|
87 |
+
evaluation_strategy:
|
88 |
+
desc: null
|
89 |
+
value: IntervalStrategy.NO
|
90 |
+
fp16:
|
91 |
+
desc: null
|
92 |
+
value: false
|
93 |
+
fp16_backend:
|
94 |
+
desc: null
|
95 |
+
value: auto
|
96 |
+
fp16_full_eval:
|
97 |
+
desc: null
|
98 |
+
value: false
|
99 |
+
fp16_opt_level:
|
100 |
+
desc: null
|
101 |
+
value: O1
|
102 |
+
gradient_accumulation_steps:
|
103 |
+
desc: null
|
104 |
+
value: 1
|
105 |
+
greater_is_better:
|
106 |
+
desc: null
|
107 |
+
value: null
|
108 |
+
group_by_length:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
ignore_data_skip:
|
112 |
+
desc: null
|
113 |
+
value: false
|
114 |
+
label_names:
|
115 |
+
desc: null
|
116 |
+
value: null
|
117 |
+
label_smoothing_factor:
|
118 |
+
desc: null
|
119 |
+
value: 0.0
|
120 |
+
learning_rate:
|
121 |
+
desc: null
|
122 |
+
value: 3.0e-05
|
123 |
+
length_column_name:
|
124 |
+
desc: null
|
125 |
+
value: length
|
126 |
+
line_by_line:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
load_best_model_at_end:
|
130 |
+
desc: null
|
131 |
+
value: false
|
132 |
+
local_rank:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_level_replica:
|
139 |
+
desc: null
|
140 |
+
value: -1
|
141 |
+
log_on_each_node:
|
142 |
+
desc: null
|
143 |
+
value: true
|
144 |
+
logging_dir:
|
145 |
+
desc: null
|
146 |
+
value: ./runs/Jul15_03-00-07_t1v-n-f5c06ea1-w-0
|
147 |
+
logging_first_step:
|
148 |
+
desc: null
|
149 |
+
value: false
|
150 |
+
logging_steps:
|
151 |
+
desc: null
|
152 |
+
value: 50
|
153 |
+
logging_strategy:
|
154 |
+
desc: null
|
155 |
+
value: IntervalStrategy.STEPS
|
156 |
+
lr_scheduler_type:
|
157 |
+
desc: null
|
158 |
+
value: SchedulerType.LINEAR
|
159 |
+
max_eval_samples:
|
160 |
+
desc: null
|
161 |
+
value: 500
|
162 |
+
max_grad_norm:
|
163 |
+
desc: null
|
164 |
+
value: 1.0
|
165 |
+
max_seq_length:
|
166 |
+
desc: null
|
167 |
+
value: 4096
|
168 |
+
max_steps:
|
169 |
+
desc: null
|
170 |
+
value: -1
|
171 |
+
metric_for_best_model:
|
172 |
+
desc: null
|
173 |
+
value: null
|
174 |
+
mlm_probability:
|
175 |
+
desc: null
|
176 |
+
value: 0.15
|
177 |
+
model_name_or_path:
|
178 |
+
desc: null
|
179 |
+
value: null
|
180 |
+
model_type:
|
181 |
+
desc: null
|
182 |
+
value: big_bird
|
183 |
+
mp_parameters:
|
184 |
+
desc: null
|
185 |
+
value: ''
|
186 |
+
no_cuda:
|
187 |
+
desc: null
|
188 |
+
value: false
|
189 |
+
num_train_epochs:
|
190 |
+
desc: null
|
191 |
+
value: 5.0
|
192 |
+
output_dir:
|
193 |
+
desc: null
|
194 |
+
value: ./
|
195 |
+
overwrite_cache:
|
196 |
+
desc: null
|
197 |
+
value: false
|
198 |
+
overwrite_output_dir:
|
199 |
+
desc: null
|
200 |
+
value: true
|
201 |
+
pad_to_max_length:
|
202 |
+
desc: null
|
203 |
+
value: false
|
204 |
+
past_index:
|
205 |
+
desc: null
|
206 |
+
value: -1
|
207 |
+
per_device_eval_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_device_train_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: 1
|
213 |
+
per_gpu_eval_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
per_gpu_train_batch_size:
|
217 |
+
desc: null
|
218 |
+
value: null
|
219 |
+
prediction_loss_only:
|
220 |
+
desc: null
|
221 |
+
value: false
|
222 |
+
preprocessing_num_workers:
|
223 |
+
desc: null
|
224 |
+
value: 96
|
225 |
+
push_to_hub:
|
226 |
+
desc: null
|
227 |
+
value: true
|
228 |
+
push_to_hub_model_id:
|
229 |
+
desc: null
|
230 |
+
value: ''
|
231 |
+
push_to_hub_organization:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
push_to_hub_token:
|
235 |
+
desc: null
|
236 |
+
value: null
|
237 |
+
remove_unused_columns:
|
238 |
+
desc: null
|
239 |
+
value: true
|
240 |
+
report_to:
|
241 |
+
desc: null
|
242 |
+
value:
|
243 |
+
- tensorboard
|
244 |
+
- wandb
|
245 |
+
resume_from_checkpoint:
|
246 |
+
desc: null
|
247 |
+
value: null
|
248 |
+
run_name:
|
249 |
+
desc: null
|
250 |
+
value: ./
|
251 |
+
save_on_each_node:
|
252 |
+
desc: null
|
253 |
+
value: false
|
254 |
+
save_steps:
|
255 |
+
desc: null
|
256 |
+
value: 30000
|
257 |
+
save_strategy:
|
258 |
+
desc: null
|
259 |
+
value: IntervalStrategy.STEPS
|
260 |
+
save_total_limit:
|
261 |
+
desc: null
|
262 |
+
value: 5
|
263 |
+
seed:
|
264 |
+
desc: null
|
265 |
+
value: 42
|
266 |
+
sharded_ddp:
|
267 |
+
desc: null
|
268 |
+
value: []
|
269 |
+
skip_memory_metrics:
|
270 |
+
desc: null
|
271 |
+
value: true
|
272 |
+
tokenizer_name:
|
273 |
+
desc: null
|
274 |
+
value: ./
|
275 |
+
tpu_metrics_debug:
|
276 |
+
desc: null
|
277 |
+
value: false
|
278 |
+
tpu_num_cores:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
train_ref_file:
|
282 |
+
desc: null
|
283 |
+
value: null
|
284 |
+
use_fast_tokenizer:
|
285 |
+
desc: null
|
286 |
+
value: true
|
287 |
+
use_legacy_prediction_loop:
|
288 |
+
desc: null
|
289 |
+
value: false
|
290 |
+
validation_ref_file:
|
291 |
+
desc: null
|
292 |
+
value: null
|
293 |
+
validation_split_percentage:
|
294 |
+
desc: null
|
295 |
+
value: 5
|
296 |
+
warmup_ratio:
|
297 |
+
desc: null
|
298 |
+
value: 0.0
|
299 |
+
warmup_steps:
|
300 |
+
desc: null
|
301 |
+
value: 10000
|
302 |
+
weight_decay:
|
303 |
+
desc: null
|
304 |
+
value: 0.0095
|
wandb/run-20210715_030015-30wihv4o/files/output.log
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:386: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code.
|
2 |
+
warnings.warn(
|
3 |
+
/home/dat/pino/lib/python3.8/site-packages/jax/lib/xla_bridge.py:373: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code.
|
4 |
+
warnings.warn(
|
5 |
+
Epoch ... (1/5): 0%| | 0/5 [00:00<?, ?it/s]
|
6 |
+
Training...: 0%| | 0/453557 [02:04<?, ?it/s]
|
7 |
+
Epoch ... (1/5): 0%| | 0/5 [09:10<?, ?it/s]
|
8 |
+
Traceback (most recent call last):
|
9 |
+
File "./run_mlm_flax_no_accum.py", line 699, in <module>
|
10 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
11 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/traceback_util.py", line 183, in reraise_with_filtered_traceback
|
12 |
+
return fun(*args, **kwargs)
|
13 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/_src/api.py", line 1669, in f_pmapped
|
14 |
+
out = pxla.xla_pmap(
|
15 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1620, in bind
|
16 |
+
return call_bind(self, fun, *args, **params)
|
17 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1551, in call_bind
|
18 |
+
outs = primitive.process(top_trace, fun, tracers, params)
|
19 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 1623, in process
|
20 |
+
return trace.process_map(self, fun, tracers, params)
|
21 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/core.py", line 606, in process_call
|
22 |
+
return primitive.impl(f, *tracers, **params)
|
23 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 637, in xla_pmap_impl
|
24 |
+
return compiled_fun(*args)
|
25 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
26 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
27 |
+
jax._src.traceback_util.UnfilteredStackTrace: RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 7.12G free, 0B reserved, and 7.08G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
28 |
+
The stack trace below excludes JAX-internal frames.
|
29 |
+
The preceding is the original exception that occurred, unmodified.
|
30 |
+
--------------------
|
31 |
+
The above exception was the direct cause of the following exception:
|
32 |
+
Traceback (most recent call last):
|
33 |
+
File "./run_mlm_flax_no_accum.py", line 699, in <module>
|
34 |
+
state, train_metric, dropout_rngs = p_train_step(state, model_inputs, dropout_rngs)
|
35 |
+
File "/home/dat/pino/lib/python3.8/site-packages/jax/interpreters/pxla.py", line 1159, in execute_replicated
|
36 |
+
out_bufs = compiled.execute_sharded_on_local_devices(input_bufs)
|
37 |
+
RuntimeError: Resource exhausted: Attempting to reserve 7.59G at the bottom of memory. That was not possible. There are 7.12G free, 0B reserved, and 7.08G reservable.: while running replica 0 and partition 0 of a replicated computation (other replicas may have failed as well).
|
wandb/run-20210715_030015-30wihv4o/files/requirements.txt
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
absl-py==0.13.0
|
2 |
+
aiohttp==3.7.4.post0
|
3 |
+
astunparse==1.6.3
|
4 |
+
async-timeout==3.0.1
|
5 |
+
attrs==21.2.0
|
6 |
+
cachetools==4.2.2
|
7 |
+
certifi==2021.5.30
|
8 |
+
chardet==4.0.0
|
9 |
+
charset-normalizer==2.0.1
|
10 |
+
chex==0.0.8
|
11 |
+
click==8.0.1
|
12 |
+
configparser==5.0.2
|
13 |
+
cycler==0.10.0
|
14 |
+
datasets==1.9.1.dev0
|
15 |
+
dill==0.3.4
|
16 |
+
dm-tree==0.1.6
|
17 |
+
docker-pycreds==0.4.0
|
18 |
+
filelock==3.0.12
|
19 |
+
flatbuffers==1.12
|
20 |
+
flax==0.3.4
|
21 |
+
fsspec==2021.7.0
|
22 |
+
gast==0.4.0
|
23 |
+
gitdb==4.0.7
|
24 |
+
gitpython==3.1.18
|
25 |
+
google-auth-oauthlib==0.4.4
|
26 |
+
google-auth==1.32.1
|
27 |
+
google-pasta==0.2.0
|
28 |
+
grpcio==1.34.1
|
29 |
+
h5py==3.1.0
|
30 |
+
huggingface-hub==0.0.12
|
31 |
+
idna==3.2
|
32 |
+
install==1.3.4
|
33 |
+
jax==0.2.17
|
34 |
+
jaxlib==0.1.68
|
35 |
+
joblib==1.0.1
|
36 |
+
keras-nightly==2.5.0.dev2021032900
|
37 |
+
keras-preprocessing==1.1.2
|
38 |
+
kiwisolver==1.3.1
|
39 |
+
libtpu-nightly==0.1.dev20210615
|
40 |
+
markdown==3.3.4
|
41 |
+
matplotlib==3.4.2
|
42 |
+
msgpack==1.0.2
|
43 |
+
multidict==5.1.0
|
44 |
+
multiprocess==0.70.12.2
|
45 |
+
numpy==1.19.5
|
46 |
+
oauthlib==3.1.1
|
47 |
+
opt-einsum==3.3.0
|
48 |
+
optax==0.0.9
|
49 |
+
packaging==21.0
|
50 |
+
pandas==1.3.0
|
51 |
+
pathtools==0.1.2
|
52 |
+
pillow==8.3.1
|
53 |
+
pip==20.0.2
|
54 |
+
pkg-resources==0.0.0
|
55 |
+
promise==2.3
|
56 |
+
protobuf==3.17.3
|
57 |
+
psutil==5.8.0
|
58 |
+
pyarrow==4.0.1
|
59 |
+
pyasn1-modules==0.2.8
|
60 |
+
pyasn1==0.4.8
|
61 |
+
pyparsing==2.4.7
|
62 |
+
python-dateutil==2.8.1
|
63 |
+
pytz==2021.1
|
64 |
+
pyyaml==5.4.1
|
65 |
+
regex==2021.7.6
|
66 |
+
requests-oauthlib==1.3.0
|
67 |
+
requests==2.26.0
|
68 |
+
rsa==4.7.2
|
69 |
+
sacremoses==0.0.45
|
70 |
+
scipy==1.7.0
|
71 |
+
sentry-sdk==1.3.0
|
72 |
+
setuptools==44.0.0
|
73 |
+
shortuuid==1.0.1
|
74 |
+
six==1.15.0
|
75 |
+
smmap==4.0.0
|
76 |
+
subprocess32==3.5.4
|
77 |
+
tensorboard-data-server==0.6.1
|
78 |
+
tensorboard-plugin-wit==1.8.0
|
79 |
+
tensorboard==2.5.0
|
80 |
+
tensorflow-estimator==2.5.0
|
81 |
+
tensorflow==2.5.0
|
82 |
+
termcolor==1.1.0
|
83 |
+
tokenizers==0.10.3
|
84 |
+
toolz==0.11.1
|
85 |
+
tqdm==4.61.2
|
86 |
+
transformers==4.9.0.dev0
|
87 |
+
typing-extensions==3.7.4.3
|
88 |
+
urllib3==1.26.6
|
89 |
+
wandb==0.10.33
|
90 |
+
werkzeug==2.0.1
|
91 |
+
wheel==0.36.2
|
92 |
+
wrapt==1.12.1
|
93 |
+
xxhash==2.0.2
|
94 |
+
yarl==1.6.3
|
wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-5.4.0-1043-gcp-x86_64-with-glibc2.29",
|
3 |
+
"python": "3.8.10",
|
4 |
+
"heartbeatAt": "2021-07-15T03:00:17.537660",
|
5 |
+
"startedAt": "2021-07-15T03:00:15.443682",
|
6 |
+
"docker": null,
|
7 |
+
"cpu_count": 96,
|
8 |
+
"cuda": null,
|
9 |
+
"args": [
|
10 |
+
"--push_to_hub",
|
11 |
+
"--output_dir=./",
|
12 |
+
"--model_type=big_bird",
|
13 |
+
"--config_name=./",
|
14 |
+
"--tokenizer_name=./",
|
15 |
+
"--max_seq_length=4096",
|
16 |
+
"--weight_decay=0.0095",
|
17 |
+
"--warmup_steps=10000",
|
18 |
+
"--overwrite_output_dir",
|
19 |
+
"--adam_beta1=0.9",
|
20 |
+
"--adam_beta2=0.98",
|
21 |
+
"--logging_steps=50",
|
22 |
+
"--eval_steps=20000",
|
23 |
+
"--num_train_epochs=5",
|
24 |
+
"--preprocessing_num_workers=96",
|
25 |
+
"--save_steps=30000",
|
26 |
+
"--learning_rate=3e-5",
|
27 |
+
"--per_device_train_batch_size=1",
|
28 |
+
"--per_device_eval_batch_size=1",
|
29 |
+
"--save_total_limit=5",
|
30 |
+
"--max_eval_samples=500"
|
31 |
+
],
|
32 |
+
"state": "running",
|
33 |
+
"program": "./run_mlm_flax_no_accum.py",
|
34 |
+
"codePath": "run_mlm_flax_no_accum.py",
|
35 |
+
"git": {
|
36 |
+
"remote": "https://huggingface.co/flax-community/pino-roberta-base",
|
37 |
+
"commit": "87e02e7ff8fbaea90c8c4ad1c984f83742432303"
|
38 |
+
},
|
39 |
+
"email": null,
|
40 |
+
"root": "/home/dat/pino-roberta-base",
|
41 |
+
"host": "t1v-n-f5c06ea1-w-0",
|
42 |
+
"username": "dat",
|
43 |
+
"executable": "/home/dat/pino/bin/python"
|
44 |
+
}
|
wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log
ADDED
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 03:00:16,154 INFO MainThread:652382 [internal.py:wandb_internal():88] W&B internal server running at pid: 652382, started at: 2021-07-15 03:00:16.153819
|
2 |
+
2021-07-15 03:00:16,156 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: check_version
|
3 |
+
2021-07-15 03:00:16,156 INFO WriterThread:652382 [datastore.py:open_for_write():80] open: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb
|
4 |
+
2021-07-15 03:00:16,157 DEBUG SenderThread:652382 [sender.py:send():179] send: header
|
5 |
+
2021-07-15 03:00:16,157 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: check_version
|
6 |
+
2021-07-15 03:00:16,194 DEBUG SenderThread:652382 [sender.py:send():179] send: run
|
7 |
+
2021-07-15 03:00:16,370 INFO SenderThread:652382 [dir_watcher.py:__init__():168] watching files in: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files
|
8 |
+
2021-07-15 03:00:16,370 INFO SenderThread:652382 [sender.py:_start_run_threads():716] run started: 30wihv4o with start time 1626318015
|
9 |
+
2021-07-15 03:00:16,372 DEBUG SenderThread:652382 [sender.py:send():179] send: summary
|
10 |
+
2021-07-15 03:00:16,373 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
11 |
+
2021-07-15 03:00:16,374 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: run_start
|
12 |
+
2021-07-15 03:00:17,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json
|
13 |
+
2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:__init__():39] meta init
|
14 |
+
2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:__init__():53] meta init done
|
15 |
+
2021-07-15 03:00:17,537 DEBUG HandlerThread:652382 [meta.py:probe():210] probe
|
16 |
+
2021-07-15 03:00:17,538 DEBUG HandlerThread:652382 [meta.py:_setup_git():200] setup git
|
17 |
+
2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_setup_git():207] setup git done
|
18 |
+
2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_save_pip():57] save pip
|
19 |
+
2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:_save_pip():71] save pip done
|
20 |
+
2021-07-15 03:00:17,568 DEBUG HandlerThread:652382 [meta.py:probe():252] probe done
|
21 |
+
2021-07-15 03:00:17,572 DEBUG SenderThread:652382 [sender.py:send():179] send: files
|
22 |
+
2021-07-15 03:00:17,572 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-metadata.json with policy now
|
23 |
+
2021-07-15 03:00:17,579 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
24 |
+
2021-07-15 03:00:17,580 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
25 |
+
2021-07-15 03:00:17,710 DEBUG SenderThread:652382 [sender.py:send():179] send: config
|
26 |
+
2021-07-15 03:00:17,710 DEBUG SenderThread:652382 [sender.py:send():179] send: config
|
27 |
+
2021-07-15 03:00:17,711 DEBUG SenderThread:652382 [sender.py:send():179] send: config
|
28 |
+
2021-07-15 03:00:18,067 INFO Thread-11 :652382 [upload_job.py:push():137] Uploaded file /tmp/tmpo5adho61wandb/1x3gq8av-wandb-metadata.json
|
29 |
+
2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt
|
30 |
+
2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json
|
31 |
+
2021-07-15 03:00:18,425 INFO Thread-8 :652382 [dir_watcher.py:_on_file_created():216] file/dir created: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
|
32 |
+
2021-07-15 03:00:32,431 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
|
33 |
+
2021-07-15 03:00:32,712 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
34 |
+
2021-07-15 03:00:32,712 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
35 |
+
2021-07-15 03:00:45,621 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
36 |
+
2021-07-15 03:00:47,437 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml
|
37 |
+
2021-07-15 03:00:47,844 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
38 |
+
2021-07-15 03:00:47,844 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
39 |
+
2021-07-15 03:01:02,976 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
40 |
+
2021-07-15 03:01:02,976 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
41 |
+
2021-07-15 03:01:15,700 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
42 |
+
2021-07-15 03:01:18,116 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
43 |
+
2021-07-15 03:01:18,116 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
44 |
+
2021-07-15 03:01:33,254 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
45 |
+
2021-07-15 03:01:33,255 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
46 |
+
2021-07-15 03:01:45,777 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
47 |
+
2021-07-15 03:01:48,387 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
48 |
+
2021-07-15 03:01:48,388 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
49 |
+
2021-07-15 03:02:03,521 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
50 |
+
2021-07-15 03:02:03,522 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
51 |
+
2021-07-15 03:02:15,842 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
52 |
+
2021-07-15 03:02:18,653 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
53 |
+
2021-07-15 03:02:18,654 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
54 |
+
2021-07-15 03:02:33,785 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
55 |
+
2021-07-15 03:02:33,785 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
56 |
+
2021-07-15 03:02:45,911 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
57 |
+
2021-07-15 03:02:48,915 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
58 |
+
2021-07-15 03:02:48,915 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
59 |
+
2021-07-15 03:03:04,048 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
60 |
+
2021-07-15 03:03:04,049 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
61 |
+
2021-07-15 03:03:15,978 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
62 |
+
2021-07-15 03:03:19,181 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
63 |
+
2021-07-15 03:03:19,181 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
64 |
+
2021-07-15 03:03:34,314 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
65 |
+
2021-07-15 03:03:34,314 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
66 |
+
2021-07-15 03:03:46,043 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
67 |
+
2021-07-15 03:03:49,447 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
68 |
+
2021-07-15 03:03:49,448 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
69 |
+
2021-07-15 03:04:04,580 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
70 |
+
2021-07-15 03:04:04,580 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
71 |
+
2021-07-15 03:04:16,110 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
72 |
+
2021-07-15 03:04:19,717 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
73 |
+
2021-07-15 03:04:19,718 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
74 |
+
2021-07-15 03:04:34,849 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
75 |
+
2021-07-15 03:04:34,849 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
76 |
+
2021-07-15 03:04:46,173 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
77 |
+
2021-07-15 03:04:49,981 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
78 |
+
2021-07-15 03:04:49,982 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
79 |
+
2021-07-15 03:05:05,119 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
80 |
+
2021-07-15 03:05:05,120 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
81 |
+
2021-07-15 03:05:16,239 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
82 |
+
2021-07-15 03:05:20,263 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
83 |
+
2021-07-15 03:05:20,264 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
84 |
+
2021-07-15 03:05:35,395 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
85 |
+
2021-07-15 03:05:35,395 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
86 |
+
2021-07-15 03:05:46,312 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
87 |
+
2021-07-15 03:05:50,529 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
88 |
+
2021-07-15 03:05:50,529 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
89 |
+
2021-07-15 03:06:05,662 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
90 |
+
2021-07-15 03:06:05,662 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
91 |
+
2021-07-15 03:06:16,385 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
92 |
+
2021-07-15 03:06:20,794 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
93 |
+
2021-07-15 03:06:20,794 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
94 |
+
2021-07-15 03:06:35,926 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
95 |
+
2021-07-15 03:06:35,926 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
96 |
+
2021-07-15 03:06:46,454 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
97 |
+
2021-07-15 03:06:51,060 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
98 |
+
2021-07-15 03:06:51,060 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
99 |
+
2021-07-15 03:07:06,201 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
100 |
+
2021-07-15 03:07:06,201 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
101 |
+
2021-07-15 03:07:16,531 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
102 |
+
2021-07-15 03:07:21,340 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
103 |
+
2021-07-15 03:07:21,340 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
104 |
+
2021-07-15 03:07:36,473 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
105 |
+
2021-07-15 03:07:36,473 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
106 |
+
2021-07-15 03:07:38,595 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
|
107 |
+
2021-07-15 03:07:46,605 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
108 |
+
2021-07-15 03:07:51,620 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
109 |
+
2021-07-15 03:07:51,620 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
110 |
+
2021-07-15 03:08:06,767 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
111 |
+
2021-07-15 03:08:06,768 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
112 |
+
2021-07-15 03:08:16,682 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
113 |
+
2021-07-15 03:08:21,898 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
114 |
+
2021-07-15 03:08:21,899 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
115 |
+
2021-07-15 03:08:37,032 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
116 |
+
2021-07-15 03:08:37,032 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
117 |
+
2021-07-15 03:08:46,763 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
118 |
+
2021-07-15 03:08:52,171 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
119 |
+
2021-07-15 03:08:52,172 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
120 |
+
2021-07-15 03:09:07,305 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
121 |
+
2021-07-15 03:09:07,305 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
122 |
+
2021-07-15 03:09:16,837 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
123 |
+
2021-07-15 03:09:22,440 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
124 |
+
2021-07-15 03:09:22,440 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
125 |
+
2021-07-15 03:09:37,575 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: stop_status
|
126 |
+
2021-07-15 03:09:37,576 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: stop_status
|
127 |
+
2021-07-15 03:09:42,648 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
|
128 |
+
2021-07-15 03:09:44,217 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
129 |
+
2021-07-15 03:09:44,217 DEBUG SenderThread:652382 [sender.py:send():179] send: telemetry
|
130 |
+
2021-07-15 03:09:44,218 DEBUG SenderThread:652382 [sender.py:send():179] send: exit
|
131 |
+
2021-07-15 03:09:44,218 INFO SenderThread:652382 [sender.py:send_exit():287] handling exit code: 1
|
132 |
+
2021-07-15 03:09:44,219 INFO SenderThread:652382 [sender.py:send_exit():295] send defer
|
133 |
+
2021-07-15 03:09:44,219 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
134 |
+
2021-07-15 03:09:44,220 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
135 |
+
2021-07-15 03:09:44,220 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 0
|
136 |
+
2021-07-15 03:09:44,221 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
137 |
+
2021-07-15 03:09:44,221 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 0
|
138 |
+
2021-07-15 03:09:44,221 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 1
|
139 |
+
2021-07-15 03:09:44,221 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
140 |
+
2021-07-15 03:09:44,221 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 1
|
141 |
+
2021-07-15 03:09:44,308 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
142 |
+
2021-07-15 03:09:44,308 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 1
|
143 |
+
2021-07-15 03:09:44,308 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 2
|
144 |
+
2021-07-15 03:09:44,308 DEBUG SenderThread:652382 [sender.py:send():179] send: stats
|
145 |
+
2021-07-15 03:09:44,309 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
146 |
+
2021-07-15 03:09:44,309 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 2
|
147 |
+
2021-07-15 03:09:44,309 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
148 |
+
2021-07-15 03:09:44,309 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 2
|
149 |
+
2021-07-15 03:09:44,309 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 3
|
150 |
+
2021-07-15 03:09:44,309 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
151 |
+
2021-07-15 03:09:44,310 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 3
|
152 |
+
2021-07-15 03:09:44,310 DEBUG SenderThread:652382 [sender.py:send():179] send: summary
|
153 |
+
2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:_save_file():841] saving file wandb-summary.json with policy end
|
154 |
+
2021-07-15 03:09:44,311 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
155 |
+
2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 3
|
156 |
+
2021-07-15 03:09:44,311 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 4
|
157 |
+
2021-07-15 03:09:44,311 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
158 |
+
2021-07-15 03:09:44,312 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 4
|
159 |
+
2021-07-15 03:09:44,312 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
160 |
+
2021-07-15 03:09:44,312 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 4
|
161 |
+
2021-07-15 03:09:44,322 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
162 |
+
2021-07-15 03:09:44,490 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 5
|
163 |
+
2021-07-15 03:09:44,490 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
164 |
+
2021-07-15 03:09:44,491 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
165 |
+
2021-07-15 03:09:44,491 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 5
|
166 |
+
2021-07-15 03:09:44,491 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
167 |
+
2021-07-15 03:09:44,491 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 5
|
168 |
+
2021-07-15 03:09:44,491 INFO SenderThread:652382 [dir_watcher.py:finish():282] shutting down directory watcher
|
169 |
+
2021-07-15 03:09:44,592 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
170 |
+
2021-07-15 03:09:44,649 INFO Thread-8 :652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json
|
171 |
+
2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml
|
172 |
+
2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:_on_file_modified():229] file/dir modified: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
|
173 |
+
2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():312] scan: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files
|
174 |
+
2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt requirements.txt
|
175 |
+
2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log output.log
|
176 |
+
2021-07-15 03:09:44,650 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-metadata.json wandb-metadata.json
|
177 |
+
2021-07-15 03:09:44,651 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml config.yaml
|
178 |
+
2021-07-15 03:09:44,651 INFO SenderThread:652382 [dir_watcher.py:finish():318] scan save: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json wandb-summary.json
|
179 |
+
2021-07-15 03:09:44,651 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 6
|
180 |
+
2021-07-15 03:09:44,651 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
181 |
+
2021-07-15 03:09:44,652 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
182 |
+
2021-07-15 03:09:44,652 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 6
|
183 |
+
2021-07-15 03:09:44,655 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
184 |
+
2021-07-15 03:09:44,655 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 6
|
185 |
+
2021-07-15 03:09:44,655 INFO SenderThread:652382 [file_pusher.py:finish():177] shutting down file pusher
|
186 |
+
2021-07-15 03:09:44,754 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
187 |
+
2021-07-15 03:09:44,754 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
188 |
+
2021-07-15 03:09:44,856 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
189 |
+
2021-07-15 03:09:44,856 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
190 |
+
2021-07-15 03:09:44,958 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
191 |
+
2021-07-15 03:09:44,958 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
192 |
+
2021-07-15 03:09:45,060 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
193 |
+
2021-07-15 03:09:45,060 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
194 |
+
2021-07-15 03:09:45,085 INFO Thread-14 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/config.yaml
|
195 |
+
2021-07-15 03:09:45,094 INFO Thread-12 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/requirements.txt
|
196 |
+
2021-07-15 03:09:45,129 INFO Thread-15 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/wandb-summary.json
|
197 |
+
2021-07-15 03:09:45,144 INFO Thread-13 :652382 [upload_job.py:push():137] Uploaded file /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/files/output.log
|
198 |
+
2021-07-15 03:09:45,162 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
199 |
+
2021-07-15 03:09:45,162 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
200 |
+
2021-07-15 03:09:45,264 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
201 |
+
2021-07-15 03:09:45,265 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
202 |
+
2021-07-15 03:09:45,345 INFO Thread-7 :652382 [sender.py:transition_state():308] send defer: 7
|
203 |
+
2021-07-15 03:09:45,345 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
204 |
+
2021-07-15 03:09:45,345 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 7
|
205 |
+
2021-07-15 03:09:45,346 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
206 |
+
2021-07-15 03:09:45,346 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 7
|
207 |
+
2021-07-15 03:09:45,366 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
208 |
+
2021-07-15 03:09:45,636 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 8
|
209 |
+
2021-07-15 03:09:45,636 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
210 |
+
2021-07-15 03:09:45,637 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
211 |
+
2021-07-15 03:09:45,637 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 8
|
212 |
+
2021-07-15 03:09:45,637 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
213 |
+
2021-07-15 03:09:45,637 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 8
|
214 |
+
2021-07-15 03:09:45,638 INFO SenderThread:652382 [sender.py:transition_state():308] send defer: 9
|
215 |
+
2021-07-15 03:09:45,638 DEBUG SenderThread:652382 [sender.py:send():179] send: final
|
216 |
+
2021-07-15 03:09:45,638 DEBUG SenderThread:652382 [sender.py:send():179] send: footer
|
217 |
+
2021-07-15 03:09:45,639 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: defer
|
218 |
+
2021-07-15 03:09:45,639 INFO HandlerThread:652382 [handler.py:handle_request_defer():141] handle defer: 9
|
219 |
+
2021-07-15 03:09:45,639 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: defer
|
220 |
+
2021-07-15 03:09:45,639 INFO SenderThread:652382 [sender.py:send_request_defer():304] handle sender defer: 9
|
221 |
+
2021-07-15 03:09:45,738 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: poll_exit
|
222 |
+
2021-07-15 03:09:45,739 DEBUG SenderThread:652382 [sender.py:send_request():193] send_request: poll_exit
|
223 |
+
2021-07-15 03:09:45,739 INFO SenderThread:652382 [file_pusher.py:join():182] waiting for file pusher
|
224 |
+
2021-07-15 03:09:45,740 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: get_summary
|
225 |
+
2021-07-15 03:09:45,741 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: sampled_history
|
226 |
+
2021-07-15 03:09:45,742 DEBUG HandlerThread:652382 [handler.py:handle_request():124] handle_request: shutdown
|
227 |
+
2021-07-15 03:09:45,742 INFO HandlerThread:652382 [handler.py:finish():638] shutting down handler
|
228 |
+
2021-07-15 03:09:46,639 INFO WriterThread:652382 [datastore.py:close():288] close: /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb
|
229 |
+
2021-07-15 03:09:46,739 INFO SenderThread:652382 [sender.py:finish():945] shutting down sender
|
230 |
+
2021-07-15 03:09:46,739 INFO SenderThread:652382 [file_pusher.py:finish():177] shutting down file pusher
|
231 |
+
2021-07-15 03:09:46,739 INFO SenderThread:652382 [file_pusher.py:join():182] waiting for file pusher
|
232 |
+
2021-07-15 03:09:46,742 INFO MainThread:652382 [internal.py:handle_exit():78] Internal process exited
|
wandb/run-20210715_030015-30wihv4o/logs/debug.log
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_setup.py:_flush():69] setting env: {}
|
2 |
+
2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_setup.py:_flush():69] setting login settings: {}
|
3 |
+
2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:_log_setup():337] Logging user logs to /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/logs/debug.log
|
4 |
+
2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:_log_setup():338] Logging internal logs to /home/dat/pino-roberta-base/wandb/run-20210715_030015-30wihv4o/logs/debug-internal.log
|
5 |
+
2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():370] calling init triggers
|
6 |
+
2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():375] wandb.init called with sweep_config: {}
|
7 |
+
config: {}
|
8 |
+
2021-07-15 03:00:15,445 INFO MainThread:651126 [wandb_init.py:init():419] starting backend
|
9 |
+
2021-07-15 03:00:15,446 INFO MainThread:651126 [backend.py:_multiprocessing_setup():70] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
10 |
+
2021-07-15 03:00:15,492 INFO MainThread:651126 [backend.py:ensure_launched():135] starting backend process...
|
11 |
+
2021-07-15 03:00:15,539 INFO MainThread:651126 [backend.py:ensure_launched():139] started backend process with pid: 652382
|
12 |
+
2021-07-15 03:00:15,541 INFO MainThread:651126 [wandb_init.py:init():424] backend started and connected
|
13 |
+
2021-07-15 03:00:15,544 INFO MainThread:651126 [wandb_init.py:init():472] updated telemetry
|
14 |
+
2021-07-15 03:00:15,545 INFO MainThread:651126 [wandb_init.py:init():491] communicating current version
|
15 |
+
2021-07-15 03:00:16,193 INFO MainThread:651126 [wandb_init.py:init():496] got version response
|
16 |
+
2021-07-15 03:00:16,193 INFO MainThread:651126 [wandb_init.py:init():504] communicating run to backend with 30 second timeout
|
17 |
+
2021-07-15 03:00:16,373 INFO MainThread:651126 [wandb_init.py:init():529] starting run threads in backend
|
18 |
+
2021-07-15 03:00:17,575 INFO MainThread:651126 [wandb_run.py:_console_start():1623] atexit reg
|
19 |
+
2021-07-15 03:00:17,575 INFO MainThread:651126 [wandb_run.py:_redirect():1497] redirect: SettingsConsole.REDIRECT
|
20 |
+
2021-07-15 03:00:17,576 INFO MainThread:651126 [wandb_run.py:_redirect():1502] Redirecting console.
|
21 |
+
2021-07-15 03:00:17,577 INFO MainThread:651126 [wandb_run.py:_redirect():1558] Redirects installed.
|
22 |
+
2021-07-15 03:00:17,578 INFO MainThread:651126 [wandb_init.py:init():554] run started, returning control to user process
|
23 |
+
2021-07-15 03:00:17,583 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'output_dir': './', 'overwrite_output_dir': True, 'do_train': False, 'do_eval': False, 'do_predict': False, 'evaluation_strategy': 'IntervalStrategy.NO', 'prediction_loss_only': False, 'per_device_train_batch_size': 1, 'per_device_eval_batch_size': 1, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 1, 'eval_accumulation_steps': None, 'learning_rate': 3e-05, 'weight_decay': 0.0095, 'adam_beta1': 0.9, 'adam_beta2': 0.98, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5.0, 'max_steps': -1, 'lr_scheduler_type': 'SchedulerType.LINEAR', 'warmup_ratio': 0.0, 'warmup_steps': 10000, 'log_level': -1, 'log_level_replica': -1, 'log_on_each_node': True, 'logging_dir': './runs/Jul15_03-00-07_t1v-n-f5c06ea1-w-0', 'logging_strategy': 'IntervalStrategy.STEPS', 'logging_first_step': False, 'logging_steps': 50, 'save_strategy': 'IntervalStrategy.STEPS', 'save_steps': 30000, 'save_total_limit': 5, 'save_on_each_node': False, 'no_cuda': False, 'seed': 42, 'fp16': False, 'fp16_opt_level': 'O1', 'fp16_backend': 'auto', 'fp16_full_eval': False, 'local_rank': -1, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 20000, 'dataloader_num_workers': 0, 'past_index': -1, 'run_name': './', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'sharded_ddp': [], 'deepspeed': None, 'label_smoothing_factor': 0.0, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['tensorboard', 'wandb'], 'ddp_find_unused_parameters': None, 'dataloader_pin_memory': True, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'push_to_hub_model_id': '', 'push_to_hub_organization': None, 'push_to_hub_token': None, 'mp_parameters': ''}
|
24 |
+
2021-07-15 03:00:17,585 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'model_name_or_path': None, 'model_type': 'big_bird', 'config_name': './', 'tokenizer_name': './', 'cache_dir': None, 'use_fast_tokenizer': True, 'dtype': 'float32'}
|
25 |
+
2021-07-15 03:00:17,586 INFO MainThread:651126 [wandb_run.py:_config_callback():872] config_cb None None {'dataset_name': None, 'dataset_config_name': None, 'train_ref_file': None, 'validation_ref_file': None, 'overwrite_cache': False, 'validation_split_percentage': 5, 'max_seq_length': 4096, 'preprocessing_num_workers': 96, 'mlm_probability': 0.15, 'pad_to_max_length': False, 'line_by_line': False, 'max_eval_samples': 500}
|
26 |
+
2021-07-15 03:09:41,805 INFO MainThread:651126 [wandb_run.py:_atexit_cleanup():1593] got exitcode: 1
|
27 |
+
2021-07-15 03:09:41,806 INFO MainThread:651126 [wandb_run.py:_restore():1565] restore
|
28 |
+
2021-07-15 03:09:44,221 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
29 |
+
wandb_count: 1
|
30 |
+
}
|
31 |
+
pusher_stats {
|
32 |
+
uploaded_bytes: 1375
|
33 |
+
total_bytes: 1375
|
34 |
+
}
|
35 |
+
|
36 |
+
2021-07-15 03:09:44,491 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
37 |
+
wandb_count: 1
|
38 |
+
}
|
39 |
+
pusher_stats {
|
40 |
+
uploaded_bytes: 1375
|
41 |
+
total_bytes: 1375
|
42 |
+
}
|
43 |
+
|
44 |
+
2021-07-15 03:09:44,652 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
45 |
+
wandb_count: 4
|
46 |
+
}
|
47 |
+
pusher_stats {
|
48 |
+
uploaded_bytes: 1375
|
49 |
+
total_bytes: 10904
|
50 |
+
}
|
51 |
+
|
52 |
+
2021-07-15 03:09:44,755 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
53 |
+
wandb_count: 5
|
54 |
+
}
|
55 |
+
pusher_stats {
|
56 |
+
uploaded_bytes: 1375
|
57 |
+
total_bytes: 10906
|
58 |
+
}
|
59 |
+
|
60 |
+
2021-07-15 03:09:44,857 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
61 |
+
wandb_count: 5
|
62 |
+
}
|
63 |
+
pusher_stats {
|
64 |
+
uploaded_bytes: 10906
|
65 |
+
total_bytes: 10906
|
66 |
+
}
|
67 |
+
|
68 |
+
2021-07-15 03:09:44,959 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
69 |
+
wandb_count: 5
|
70 |
+
}
|
71 |
+
pusher_stats {
|
72 |
+
uploaded_bytes: 10906
|
73 |
+
total_bytes: 10906
|
74 |
+
}
|
75 |
+
|
76 |
+
2021-07-15 03:09:45,061 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
77 |
+
wandb_count: 5
|
78 |
+
}
|
79 |
+
pusher_stats {
|
80 |
+
uploaded_bytes: 10906
|
81 |
+
total_bytes: 10906
|
82 |
+
}
|
83 |
+
|
84 |
+
2021-07-15 03:09:45,163 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
85 |
+
wandb_count: 5
|
86 |
+
}
|
87 |
+
pusher_stats {
|
88 |
+
uploaded_bytes: 10906
|
89 |
+
total_bytes: 10906
|
90 |
+
}
|
91 |
+
|
92 |
+
2021-07-15 03:09:45,265 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
93 |
+
wandb_count: 5
|
94 |
+
}
|
95 |
+
pusher_stats {
|
96 |
+
uploaded_bytes: 10906
|
97 |
+
total_bytes: 10906
|
98 |
+
}
|
99 |
+
|
100 |
+
2021-07-15 03:09:45,637 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: file_counts {
|
101 |
+
wandb_count: 5
|
102 |
+
}
|
103 |
+
pusher_stats {
|
104 |
+
uploaded_bytes: 10906
|
105 |
+
total_bytes: 10906
|
106 |
+
}
|
107 |
+
|
108 |
+
2021-07-15 03:09:45,739 INFO MainThread:651126 [wandb_run.py:_wait_for_finish():1715] got exit ret: done: true
|
109 |
+
exit_result {
|
110 |
+
}
|
111 |
+
file_counts {
|
112 |
+
wandb_count: 5
|
113 |
+
}
|
114 |
+
pusher_stats {
|
115 |
+
uploaded_bytes: 10906
|
116 |
+
total_bytes: 10906
|
117 |
+
}
|
118 |
+
|
119 |
+
2021-07-15 03:09:47,041 INFO MainThread:651126 [wandb_run.py:_show_files():1937] logging synced files
|
wandb/run-20210715_030015-30wihv4o/run-30wihv4o.wandb
ADDED
Binary file (11.1 kB). View file
|
|
wandb/run-20210715_031107-69jkygz3/files/config.yaml
ADDED
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
wandb_version: 1
|
2 |
+
|
3 |
+
_wandb:
|
4 |
+
desc: null
|
5 |
+
value:
|
6 |
+
cli_version: 0.10.33
|
7 |
+
framework: huggingface
|
8 |
+
huggingface_version: 4.9.0.dev0
|
9 |
+
is_jupyter_run: false
|
10 |
+
is_kaggle_kernel: false
|
11 |
+
python_version: 3.8.10
|
12 |
+
t:
|
13 |
+
1:
|
14 |
+
- 3
|
15 |
+
- 11
|
16 |
+
4: 3.8.10
|
17 |
+
5: 0.10.33
|
18 |
+
6: 4.9.0.dev0
|
19 |
+
8:
|
20 |
+
- 5
|
21 |
+
adafactor:
|
22 |
+
desc: null
|
23 |
+
value: false
|
24 |
+
adam_beta1:
|
25 |
+
desc: null
|
26 |
+
value: 0.9
|
27 |
+
adam_beta2:
|
28 |
+
desc: null
|
29 |
+
value: 0.98
|
30 |
+
adam_epsilon:
|
31 |
+
desc: null
|
32 |
+
value: 1.0e-08
|
33 |
+
cache_dir:
|
34 |
+
desc: null
|
35 |
+
value: null
|
36 |
+
config_name:
|
37 |
+
desc: null
|
38 |
+
value: ./
|
39 |
+
dataloader_drop_last:
|
40 |
+
desc: null
|
41 |
+
value: false
|
42 |
+
dataloader_num_workers:
|
43 |
+
desc: null
|
44 |
+
value: 0
|
45 |
+
dataloader_pin_memory:
|
46 |
+
desc: null
|
47 |
+
value: true
|
48 |
+
dataset_config_name:
|
49 |
+
desc: null
|
50 |
+
value: null
|
51 |
+
dataset_name:
|
52 |
+
desc: null
|
53 |
+
value: null
|
54 |
+
ddp_find_unused_parameters:
|
55 |
+
desc: null
|
56 |
+
value: null
|
57 |
+
debug:
|
58 |
+
desc: null
|
59 |
+
value: []
|
60 |
+
deepspeed:
|
61 |
+
desc: null
|
62 |
+
value: null
|
63 |
+
disable_tqdm:
|
64 |
+
desc: null
|
65 |
+
value: false
|
66 |
+
do_eval:
|
67 |
+
desc: null
|
68 |
+
value: false
|
69 |
+
do_predict:
|
70 |
+
desc: null
|
71 |
+
value: false
|
72 |
+
do_train:
|
73 |
+
desc: null
|
74 |
+
value: false
|
75 |
+
dtype:
|
76 |
+
desc: null
|
77 |
+
value: float32
|
78 |
+
eval_accumulation_steps:
|
79 |
+
desc: null
|
80 |
+
value: null
|
81 |
+
eval_steps:
|
82 |
+
desc: null
|
83 |
+
value: 20000
|
84 |
+
evaluation_strategy:
|
85 |
+
desc: null
|
86 |
+
value: IntervalStrategy.NO
|
87 |
+
fp16:
|
88 |
+
desc: null
|
89 |
+
value: false
|
90 |
+
fp16_backend:
|
91 |
+
desc: null
|
92 |
+
value: auto
|
93 |
+
fp16_full_eval:
|
94 |
+
desc: null
|
95 |
+
value: false
|
96 |
+
fp16_opt_level:
|
97 |
+
desc: null
|
98 |
+
value: O1
|
99 |
+
gradient_accumulation_steps:
|
100 |
+
desc: null
|
101 |
+
value: 1
|
102 |
+
greater_is_better:
|
103 |
+
desc: null
|
104 |
+
value: null
|
105 |
+
group_by_length:
|
106 |
+
desc: null
|
107 |
+
value: false
|
108 |
+
ignore_data_skip:
|
109 |
+
desc: null
|
110 |
+
value: false
|
111 |
+
label_names:
|
112 |
+
desc: null
|
113 |
+
value: null
|
114 |
+
label_smoothing_factor:
|
115 |
+
desc: null
|
116 |
+
value: 0.0
|
117 |
+
learning_rate:
|
118 |
+
desc: null
|
119 |
+
value: 3.0e-05
|
120 |
+
length_column_name:
|
121 |
+
desc: null
|
122 |
+
value: length
|
123 |
+
line_by_line:
|
124 |
+
desc: null
|
125 |
+
value: false
|
126 |
+
load_best_model_at_end:
|
127 |
+
desc: null
|
128 |
+
value: false
|
129 |
+
local_rank:
|
130 |
+
desc: null
|
131 |
+
value: -1
|
132 |
+
log_level:
|
133 |
+
desc: null
|
134 |
+
value: -1
|
135 |
+
log_level_replica:
|
136 |
+
desc: null
|
137 |
+
value: -1
|
138 |
+
log_on_each_node:
|
139 |
+
desc: null
|
140 |
+
value: true
|
141 |
+
logging_dir:
|
142 |
+
desc: null
|
143 |
+
value: ./runs/Jul15_03-10-59_t1v-n-f5c06ea1-w-0
|
144 |
+
logging_first_step:
|
145 |
+
desc: null
|
146 |
+
value: false
|
147 |
+
logging_steps:
|
148 |
+
desc: null
|
149 |
+
value: 50
|
150 |
+
logging_strategy:
|
151 |
+
desc: null
|
152 |
+
value: IntervalStrategy.STEPS
|
153 |
+
lr_scheduler_type:
|
154 |
+
desc: null
|
155 |
+
value: SchedulerType.LINEAR
|
156 |
+
max_eval_samples:
|
157 |
+
desc: null
|
158 |
+
value: 500
|
159 |
+
max_grad_norm:
|
160 |
+
desc: null
|
161 |
+
value: 1.0
|
162 |
+
max_seq_length:
|
163 |
+
desc: null
|
164 |
+
value: 4096
|
165 |
+
max_steps:
|
166 |
+
desc: null
|
167 |
+
value: -1
|
168 |
+
metric_for_best_model:
|
169 |
+
desc: null
|
170 |
+
value: null
|
171 |
+
mlm_probability:
|
172 |
+
desc: null
|
173 |
+
value: 0.15
|
174 |
+
model_name_or_path:
|
175 |
+
desc: null
|
176 |
+
value: null
|
177 |
+
model_type:
|
178 |
+
desc: null
|
179 |
+
value: big_bird
|
180 |
+
mp_parameters:
|
181 |
+
desc: null
|
182 |
+
value: ''
|
183 |
+
no_cuda:
|
184 |
+
desc: null
|
185 |
+
value: false
|
186 |
+
num_train_epochs:
|
187 |
+
desc: null
|
188 |
+
value: 5.0
|
189 |
+
output_dir:
|
190 |
+
desc: null
|
191 |
+
value: ./
|
192 |
+
overwrite_cache:
|
193 |
+
desc: null
|
194 |
+
value: false
|
195 |
+
overwrite_output_dir:
|
196 |
+
desc: null
|
197 |
+
value: true
|
198 |
+
pad_to_max_length:
|
199 |
+
desc: null
|
200 |
+
value: false
|
201 |
+
past_index:
|
202 |
+
desc: null
|
203 |
+
value: -1
|
204 |
+
per_device_eval_batch_size:
|
205 |
+
desc: null
|
206 |
+
value: 1
|
207 |
+
per_device_train_batch_size:
|
208 |
+
desc: null
|
209 |
+
value: 1
|
210 |
+
per_gpu_eval_batch_size:
|
211 |
+
desc: null
|
212 |
+
value: null
|
213 |
+
per_gpu_train_batch_size:
|
214 |
+
desc: null
|
215 |
+
value: null
|
216 |
+
prediction_loss_only:
|
217 |
+
desc: null
|
218 |
+
value: false
|
219 |
+
preprocessing_num_workers:
|
220 |
+
desc: null
|
221 |
+
value: 96
|
222 |
+
push_to_hub:
|
223 |
+
desc: null
|
224 |
+
value: true
|
225 |
+
push_to_hub_model_id:
|
226 |
+
desc: null
|
227 |
+
value: ''
|
228 |
+
push_to_hub_organization:
|
229 |
+
desc: null
|
230 |
+
value: null
|
231 |
+
push_to_hub_token:
|
232 |
+
desc: null
|
233 |
+
value: null
|
234 |
+
remove_unused_columns:
|
235 |
+
desc: null
|
236 |
+
value: true
|
237 |
+
report_to:
|
238 |
+
desc: null
|
239 |
+
value:
|
240 |
+
- tensorboard
|
241 |
+
- wandb
|
242 |
+
resume_from_checkpoint:
|
243 |
+
desc: null
|
244 |
+
value: null
|
245 |
+
run_name:
|
246 |
+
desc: null
|
247 |
+
value: ./
|
248 |
+
save_on_each_node:
|
249 |
+
desc: null
|
250 |
+
value: false
|
251 |
+
save_steps:
|
252 |
+
desc: null
|
253 |
+
value: 30000
|
254 |
+
save_strategy:
|
255 |
+
desc: null
|
256 |
+
value: IntervalStrategy.STEPS
|
257 |
+
save_total_limit:
|
258 |
+
desc: null
|
259 |
+
value: 5
|
260 |
+
seed:
|
261 |
+
desc: null
|
262 |
+
value: 42
|
263 |
+
sharded_ddp:
|
264 |
+
desc: null
|
265 |
+
value: []
|
266 |
+
skip_memory_metrics:
|
267 |
+
desc: null
|
268 |
+
value: true
|
269 |
+
tokenizer_name:
|
270 |
+
desc: null
|
271 |
+
value: ./
|
272 |
+
tpu_metrics_debug:
|
273 |
+
desc: null
|
274 |
+
value: false
|
275 |
+
tpu_num_cores:
|
276 |
+
desc: null
|
277 |
+
value: null
|
278 |
+
train_ref_file:
|
279 |
+
desc: null
|
280 |
+
value: null
|
281 |
+
use_fast_tokenizer:
|
282 |
+
desc: null
|
283 |
+
value: true
|
284 |
+
use_legacy_prediction_loop:
|
285 |
+
desc: null
|
286 |
+
value: false
|
287 |
+
validation_ref_file:
|
288 |
+
desc: null
|
289 |
+
value: null
|
290 |
+
validation_split_percentage:
|
291 |
+
desc: null
|
292 |
+
value: 5
|
293 |
+
warmup_ratio:
|
294 |
+
desc: null
|
295 |
+
value: 0.0
|
296 |
+
warmup_steps:
|
297 |
+
desc: null
|
298 |
+
value: 10000
|
299 |
+
weight_decay:
|
300 |
+
desc: null
|
301 |
+
value: 0.0095
|