pere commited on
Commit
312a952
1 Parent(s): 5224747

Saving weights and logs of step 2500

Browse files
.run_train.sh.un~ CHANGED
Binary files a/.run_train.sh.un~ and b/.run_train.sh.un~ differ
 
events.out.tfevents.1734083129.t1v-n-53cd541d-w-35.1088772.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:814f063da08f57c0ad2c30e14e1e94d1ab74b8accf7de1f9932ab3e97efde6d3
3
+ size 25422
events.out.tfevents.1734083608.t1v-n-53cd541d-w-35.1090638.0.v2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcb5f593aeba3784103714133c5a0f3ce020d535c73fd0e5d66e1f6ab0811e28
3
+ size 63038
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c4c381d4203e6b03a768f5cd7066942ed7e02f54c5534d09efd039be2c86d2e
3
  size 1421658229
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b60ef116cd82e5520339f5cbc2e21252393aa9fc04137b712e77711865066e
3
  size 1421658229
run_train.sh CHANGED
@@ -10,7 +10,7 @@ python run_mlm_flax.py \
10
  --weight_decay="0.01" \
11
  --per_device_train_batch_size="64" \
12
  --per_device_eval_batch_size="64" \
13
- --learning_rate="5e-4" \
14
  --warmup_steps="1000" \
15
  --overwrite_output_dir \
16
  --num_train_epochs="100" \
 
10
  --weight_decay="0.01" \
11
  --per_device_train_batch_size="64" \
12
  --per_device_eval_batch_size="64" \
13
+ --learning_rate="2e-3" \
14
  --warmup_steps="1000" \
15
  --overwrite_output_dir \
16
  --num_train_epochs="100" \