Saving weights and logs of step 2500
Browse files
.run_train.sh.un~
CHANGED
Binary files a/.run_train.sh.un~ and b/.run_train.sh.un~ differ
|
|
events.out.tfevents.1734083129.t1v-n-53cd541d-w-35.1088772.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:814f063da08f57c0ad2c30e14e1e94d1ab74b8accf7de1f9932ab3e97efde6d3
|
3 |
+
size 25422
|
events.out.tfevents.1734083608.t1v-n-53cd541d-w-35.1090638.0.v2
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bcb5f593aeba3784103714133c5a0f3ce020d535c73fd0e5d66e1f6ab0811e28
|
3 |
+
size 63038
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1421658229
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33b60ef116cd82e5520339f5cbc2e21252393aa9fc04137b712e77711865066e
|
3 |
size 1421658229
|
run_train.sh
CHANGED
@@ -10,7 +10,7 @@ python run_mlm_flax.py \
|
|
10 |
--weight_decay="0.01" \
|
11 |
--per_device_train_batch_size="64" \
|
12 |
--per_device_eval_batch_size="64" \
|
13 |
-
--learning_rate="
|
14 |
--warmup_steps="1000" \
|
15 |
--overwrite_output_dir \
|
16 |
--num_train_epochs="100" \
|
|
|
10 |
--weight_decay="0.01" \
|
11 |
--per_device_train_batch_size="64" \
|
12 |
--per_device_eval_batch_size="64" \
|
13 |
+
--learning_rate="2e-3" \
|
14 |
--warmup_steps="1000" \
|
15 |
--overwrite_output_dir \
|
16 |
--num_train_epochs="100" \
|