aapot
commited on
Commit
•
105a2e7
1
Parent(s):
3f8e7db
Add 10k train step
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- .gitignore +1 -0
- base_pretrain.gin +24 -0
- checkpoint_10000/checkpoint +3 -0
- checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray +3 -0
- checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0 +3 -0
- checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 +3 -0
- checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +3 -0
- checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0 +3 -0
.gitattributes
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
4 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
6 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
@@ -25,3 +26,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
1 |
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
5 |
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
6 |
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
*.gz filter=lfs diff=lfs merge=lfs -text
|
|
|
26 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
27 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
28 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
29 |
+
checkpoint*/** filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__/
|
base_pretrain.gin
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Register necessary SeqIO Tasks/Mixtures.
|
2 |
+
from __gin__ import dynamic_registration
|
3 |
+
from t5x import utils
|
4 |
+
import tasks
|
5 |
+
import __main__ as train_script
|
6 |
+
|
7 |
+
include 't5x/examples/t5/byt5/base.gin'
|
8 |
+
include 't5x/configs/runs/pretrain.gin'
|
9 |
+
|
10 |
+
|
11 |
+
# ------------------- Training specification overrides --------------------------
|
12 |
+
train_script.train:
|
13 |
+
eval_period = 10000
|
14 |
+
|
15 |
+
utils.SaveCheckpointConfig:
|
16 |
+
period = 10000
|
17 |
+
keep = 10
|
18 |
+
|
19 |
+
MIXTURE_OR_TASK_NAME = "byt5_pretrain_finnish"
|
20 |
+
USE_CACHED_TASKS = False
|
21 |
+
TASK_FEATURE_LENGTHS = {"inputs": 512, "targets": 512}
|
22 |
+
TRAIN_STEPS = 1000000
|
23 |
+
DROPOUT_RATE = 0.0
|
24 |
+
BATCH_SIZE = 256
|
checkpoint_10000/checkpoint
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f34bdb8f96c3559b2aa10df0336250ae19e3270a0e5f830a682a13469f252bb
|
3 |
+
size 2792047
|
checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.decoder_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b02e30ad784125e9ecc40ddf27786a86edbfb3bc8f46a51f668ee3b5c5b7af5
|
3 |
+
size 5546
|
checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d49d5808306f7001c8056a954c8a196fa69ec63ef873f816d8b8eae1f71fb898
|
3 |
+
size 5540
|
checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d250f8cd70a199e4797f733a3b469a7e7eb0110b7c31835f2421db9dc3ecd3b7
|
3 |
+
size 5471
|
checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:139c78ca54e195479efa3da47e9e2a76d57e7e1d833cd7eec1cc0fee412242da
|
3 |
+
size 5591
|
checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:81e7f913939f945cd1baf3a05ec2b26f2e69f0f24bd0e25c32a881aaa65d727f
|
3 |
+
size 5502
|
checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c3acae52d1597753f9b2f366627567acff7de0aaee8620109d0b73bbdfc4d789
|
3 |
+
size 5473
|
checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dca47bf50d898efde5d9b10fdb8b83b008ca24c693098c0cf26609f7b86d19c2
|
3 |
+
size 5555
|
checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3cb56573b3bc10b358a676e511073f579b80d0d8563c015dd0ff8927ffcfb23b
|
3 |
+
size 5485
|
checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a86495308631fe5372d96b05d2a3f9f2a860402ec111b4ad96322bd353df94b
|
3 |
+
size 5495
|
checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fa259d85404d2c6348dab818abfd2d22a87153fb0197bb42fda9d8b0e934850
|
3 |
+
size 5536
|
checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a89715c7324274863f7983995ff8945c639c2be07648214651796e987c2815a
|
3 |
+
size 5506
|
checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:72c5a77f7158105215a9509ba57a0369b033e11f752c902d5b77bd5d1beaaeac
|
3 |
+
size 5503
|
checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:162c30ff3d061438785dac40e5d6cb8a6e3f844a2616d2049719086ff42181f5
|
3 |
+
size 5547
|
checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0613f2003d094908c3faf2875b5ca9680991ab912772224f169d0701b6278685
|
3 |
+
size 5519
|
checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85ab5fa4ac6b702e9e97e14f437c7e52b832b2706a73467d1c40db33996b84be
|
3 |
+
size 5570
|
checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a8b19d46610a38c8edbb56dd6263a1798dff8027238c4a16e8e6983b3033d929
|
3 |
+
size 5576
|
checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a517e404b1a6df3abe86ac685c1b4de20f848ffdd10bb3b12a57c61f7b0946c
|
3 |
+
size 5519
|
checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6cf26d2e6e6708e7e2c13c6f84a57acbb46fbbd3493b2df94e563c3637ecf84
|
3 |
+
size 5549
|
checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aeaf0ab88e801024c13a0052c0c3e8073369c859e28a72dca19868806dbd6005
|
3 |
+
size 5665
|
checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7028b3f2007ef72746d64487314ca6723cab7f9713ecb30bd30b73a22418e831
|
3 |
+
size 172
|
checkpoint_10000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65284ef7b0de15a72da6fcca4a522092b8d4301a5aafea385486e85988ad0b85
|
3 |
+
size 1445
|
checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.encoder.encoder_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b58a73db9acb2ae71f77eb4c6753399fed91531c45ee86df63123339242683a
|
3 |
+
size 5543
|
checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2eac16a2916d7d8082fa87fc236150b4cf77a5f8e4c8802cd530015cabdf199b
|
3 |
+
size 5614
|
checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c15454ca58ace9d64cc661f694fa3a5c43479ab9e9298e3f5d36932e899b8ecc
|
3 |
+
size 170
|
checkpoint_10000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:149e9f6c7a89621bdf03c0abc8ef305ae9ac74d6a9408d3ee8b0ce8c0971314e
|
3 |
+
size 5437
|