dhladek commited on
Commit
1729ba2
·
verified ·
1 Parent(s): 77a3ac8

training on large slovak corpus

Browse files
config.json CHANGED
@@ -21,5 +21,9 @@
21
  "pad_token_id": 0,
22
  "relative_attention_num_buckets": 32,
23
  "tie_word_embeddings": false,
24
- "vocab_size": 64100
 
 
 
 
25
  }
 
21
  "pad_token_id": 0,
22
  "relative_attention_num_buckets": 32,
23
  "tie_word_embeddings": false,
24
+ "vocab_size": 120100,
25
+ "sp_model_kwargs":{
26
+ "enable_sampling": true,
27
+ "nbest_size": 4
28
+ }
29
  }
hydra/config.yaml CHANGED
@@ -6,36 +6,37 @@ predict_only: false
6
  seed: 2137
7
  model:
8
  klass: hf_t5
9
- name: /home/jovyan/bert-train/nanot5/base_slovak_model/
10
  overwrite:
11
  dropout_rate: 0.0
12
  add_config:
13
- is_bf16: false
14
  checkpoint_path: ''
15
  random_init: true
16
  compile: false
17
  data:
 
18
  input_length: 512
19
  mlm_probability: 0.15
20
  mean_noise_span_length: 3.0
21
- num_workers: 2
22
  optim:
23
- name: adamwscale
24
  base_lr: 0.02
25
  batch_size: 128
26
- total_steps: 65536
27
  epochs: -1
28
  warmup_steps: 10000
29
- lr_scheduler: cosine
30
  weight_decay: 0.0
31
  grad_clip: 1.0
32
- grad_acc: 4
33
  final_cosine: 1.0e-05
34
  eval:
35
  every_steps: 5000
36
  steps: 500
37
  checkpoint:
38
- every_steps: 20000
39
  logging:
40
  neptune: false
41
  neptune_creds:
 
6
  seed: 2137
7
  model:
8
  klass: hf_t5
9
+ name: /home/jovyan/bert-train/nanot5/templates/base_sklarge_120k
10
  overwrite:
11
  dropout_rate: 0.0
12
  add_config:
13
+ is_bf16: true
14
  checkpoint_path: ''
15
  random_init: true
16
  compile: false
17
  data:
18
+ train_path: /home/jovyan/data/sklarge-shards
19
  input_length: 512
20
  mlm_probability: 0.15
21
  mean_noise_span_length: 3.0
22
+ num_workers: 8
23
  optim:
24
+ name: adafactor
25
  base_lr: 0.02
26
  batch_size: 128
27
+ total_steps: 120000
28
  epochs: -1
29
  warmup_steps: 10000
30
+ lr_scheduler: legacy
31
  weight_decay: 0.0
32
  grad_clip: 1.0
33
+ grad_acc: 8
34
  final_cosine: 1.0e-05
35
  eval:
36
  every_steps: 5000
37
  steps: 500
38
  checkpoint:
39
+ every_steps: 10000
40
  logging:
41
  neptune: false
42
  neptune_creds:
hydra/hydra.yaml CHANGED
@@ -112,17 +112,21 @@ hydra:
112
  hydra:
113
  - hydra.mode=RUN
114
  task:
115
- - optim.name=adamwscale
116
- - optim.lr_scheduler=cosine
117
- - model.name=/home/jovyan/bert-train/nanot5/base_slovak_model/
118
- - optim.grad_acc=4
 
119
  - model.klass=hf_t5
120
  - eval.every_steps=5000
121
- - checkpoint.every_steps=20000
 
 
 
122
  job:
123
  name: main
124
  chdir: true
125
- override_dirname: checkpoint.every_steps=20000,eval.every_steps=5000,model.klass=hf_t5,model.name=/home/jovyan/bert-train/nanot5/base_slovak_model/,optim.grad_acc=4,optim.lr_scheduler=cosine,optim.name=adamwscale
126
  id: ???
127
  num: ???
128
  config_name: default
@@ -147,7 +151,7 @@ hydra:
147
  - path: ''
148
  schema: structured
149
  provider: schema
150
- output_dir: /home/jovyan/nanoT5/logs/2024-01-02/07-29-30-
151
  choices:
152
  local_env: default
153
  task: pt
 
112
  hydra:
113
  - hydra.mode=RUN
114
  task:
115
+ - optim.name=adafactor
116
+ - optim.lr_scheduler=legacy
117
+ - model.name=/home/jovyan/bert-train/nanot5/templates/base_sklarge_120k
118
+ - data.train_path=/home/jovyan/data/sklarge-shards
119
+ - optim.grad_acc=8
120
  - model.klass=hf_t5
121
  - eval.every_steps=5000
122
+ - optim.total_steps=120000
123
+ - model.add_config.is_bf16=True
124
+ - checkpoint.every_steps=10000
125
+ - model.compile=False
126
  job:
127
  name: main
128
  chdir: true
129
+ override_dirname: checkpoint.every_steps=10000,data.train_path=/home/jovyan/data/sklarge-shards,eval.every_steps=5000,model.add_config.is_bf16=True,model.compile=False,model.klass=hf_t5,model.name=/home/jovyan/bert-train/nanot5/templates/base_sklarge_120k,optim.grad_acc=8,optim.lr_scheduler=legacy,optim.name=adafactor,optim.total_steps=120000
130
  id: ???
131
  num: ???
132
  config_name: default
 
151
  - path: ''
152
  schema: structured
153
  provider: schema
154
+ output_dir: /home/jovyan/nanoT5/logs/2024-07-29/11-48-34-
155
  choices:
156
  local_env: default
157
  task: pt
hydra/overrides.yaml CHANGED
@@ -1,7 +1,11 @@
1
- - optim.name=adamwscale
2
- - optim.lr_scheduler=cosine
3
- - model.name=/home/jovyan/bert-train/nanot5/base_slovak_model/
4
- - optim.grad_acc=4
 
5
  - model.klass=hf_t5
6
  - eval.every_steps=5000
7
- - checkpoint.every_steps=20000
 
 
 
 
1
+ - optim.name=adafactor
2
+ - optim.lr_scheduler=legacy
3
+ - model.name=/home/jovyan/bert-train/nanot5/templates/base_sklarge_120k
4
+ - data.train_path=/home/jovyan/data/sklarge-shards
5
+ - optim.grad_acc=8
6
  - model.klass=hf_t5
7
  - eval.every_steps=5000
8
+ - optim.total_steps=120000
9
+ - model.add_config.is_bf16=True
10
+ - checkpoint.every_steps=10000
11
+ - model.compile=False
main.log CHANGED
The diff for this file is too large to render. See raw diff
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8adcaa3befd91b9080f22d00466debbce77b0989eb3b038269f83aad1ee5e934
3
- size 1186781032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d083edc4ffae1fc5025aeec7b6417c459a1654010f0bd9fee0ae6c57ab97332f
3
+ size 1530845040
optimizer.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d9f3304271a05b8894b45869b7bae03ae655aa7615410d7ee722ab02064ceea
3
- size 2373662661
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d8ba594f622898a38b758a0bfc7a6bf1e79acde08d843d03b771a5b220106b
3
+ size 3075653
random_states_0.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a429eefea0f594d70075b0d57721b18d7e6e4d12263218ee30780a6e613c04cd
3
  size 14663
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:032d5c0c40c80570faf14e7a3a2ba39d76b91afecb36586b35a861be0d83556d
3
  size 14663
scheduler.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b51bb232eab4bf0dd13cf507f602d257c3596e6828466eb03664de335c46223
3
  size 819
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26cfd01bde4900b7b03e0e9b7ec7d389da736cdcf1ccb3d46a64498895077f0
3
  size 819
spiece.model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:353edf8f042581ee554bb883dd4a19e0888d778553e3fd5ca9c97bb76434406b
3
- size 1408434
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9944e5920f922d26793ec2b15ae90f576584035eecde1e9eee0923bc8c3fc328
3
+ size 2575539
spiece.vocab CHANGED
The diff for this file is too large to render. See raw diff
 
train-model.sh ADDED
@@ -0,0 +1 @@
 
 
1
+ python -m nanoT5.main optim.name=adafactor optim.lr_scheduler=legacy model.name=/home/jovyan/bert-train/nanot5/templates/base_sklarge_120k data.train_path=/home/jovyan/data/sklarge-shards optim.grad_acc=8 model.klass=hf_t5 eval.every_steps=5000 optim.total_steps=120000 model.add_config.is_bf16=True checkpoint.every_steps=10000 model.compile=False