diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..4ef8a6d4335aa061664784a025de9f55a245a83e 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+global_step262772_universal/ filter=lfs diff=lfs merge=lfs -text
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..fde2fdf3214b4d7e9f5bcd9ea1c90db11605013b
--- /dev/null
+++ b/config.json
@@ -0,0 +1,69 @@
+{
+ "architectures": [
+ "YuLanMiniForCausalLM"
+ ],
+ "auto_map": {
+ "AutoConfig": "configuration_yulanmini.YuLanMiniConfig",
+ "AutoModel": "modeling_yulanmini.YuLanMiniModel",
+ "AutoModelForCausalLM": "modeling_yulanmini.YuLanMiniForCausalLM",
+ },
+ "attention_bias": true,
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "dim_model_base": 1920,
+ "dim_model_base_attn": 64,
+ "dim_model_base_init": null,
+ "dim_model_base_lmh": 1,
+ "dim_model_base_logits": 1920.0,
+ "dim_model_base_lr": 256.0,
+ "down_proj_alpha": 0.03450327796711771,
+ "embed_tokens_alpha": 1,
+ "embedding_ln": false,
+ "embedding_rmsln": false,
+ "eos_token_id": 2,
+ "gate_up_proj_alpha": 0.3651483716701107,
+ "gradient_checkpointing_step": 56,
+ "hidden_act": "silu",
+ "hidden_size": 1920,
+ "hidden_states_shrink": 0.18708286933869706,
+ "init_scale_o": 1,
+ "initializer_range": 5e-05,
+ "input_layernorm_alpha": 1.0,
+ "intermediate_size": 4800,
+ "k_proj_alpha": 0.3651483716701107,
+ "layer_norm_eps": 1e-06,
+ "lm_head_alpha": 1.0,
+ "ln_scale": 1,
+ "max_position_embeddings": 28723,
+ "model_reproduce": "transformer",
+ "model_type": "yulanmini",
+ "norm_alpha": 1.0,
+ "num_attention_heads": 30,
+ "num_epochs_trained_before_this_epoch": 26,
+ "num_hidden_layers": 56,
+ "num_key_value_heads": 6,
+ "num_steps_trained_before_this_epoch": 253006,
+ "o_proj_alpha": 0.03450327796711771,
+ "post_attention_layernorm_alpha": 1.0,
+ "q_proj_alpha": 0.3651483716701107,
+ "qk_layernorm": false,
+ "rms_norm_eps": 1e-06,
+ "rms_type": "llama",
+ "rope_scaling": null,
+ "rope_theta": 490000.0,
+ "scale_emb": 10.0,
+ "shrink_alpha": 1,
+ "sliding_window": null,
+ "tie_word_embeddings": true,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.44.0",
+ "use_cache": true,
+ "use_emb_alpha": true,
+ "use_liger": true,
+ "use_norm_alpha": true,
+ "use_sliding_window": false,
+ "v_proj_alpha": 0.3651483716701107,
+ "vocab_size": 99000,
+ "wesar_weights": true,
+ "z_loss": 0.0001
+}
diff --git a/global_step262772_universal/mp_rank_00_model_states.pt b/global_step262772_universal/mp_rank_00_model_states.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f6d60a1e2c43caf7c351e0e477de796b1ba9b1a
--- /dev/null
+++ b/global_step262772_universal/mp_rank_00_model_states.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:068960e69bea17a9db7d28394f5d4188548e03b6123f2523d3306b4ea7453d3a
+size 4468641200
diff --git a/global_step262772_universal/zero/lm_head_alpha/exp_avg.pt b/global_step262772_universal/zero/lm_head_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2fadd3958e4af900b76c6446870b9f0e212c8105
--- /dev/null
+++ b/global_step262772_universal/zero/lm_head_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24d1e97f32ee6a14d3b980485c33afd6eff0be75132a5e6c4420616ff70ba33a
+size 1180
diff --git a/global_step262772_universal/zero/lm_head_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/lm_head_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cba52e4f8f11ee551add47eebc59bf35424c13b5
--- /dev/null
+++ b/global_step262772_universal/zero/lm_head_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:488a84a8d79d80262280c382dc3084ae6a17c283073d19c6b8f3624a7e30504a
+size 1195
diff --git a/global_step262772_universal/zero/lm_head_alpha/fp32.pt b/global_step262772_universal/zero/lm_head_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..89c563072d9e35890eb01df492befdda4b55b278
--- /dev/null
+++ b/global_step262772_universal/zero/lm_head_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:920a12ef25b001861b5826242427f2a5a12cd1ab5c0994646ac3a00744359739
+size 1165
diff --git a/global_step262772_universal/zero/lm_head_alpha/step.pt b/global_step262772_universal/zero/lm_head_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/lm_head_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.embed_tokens.weight/exp_avg.pt b/global_step262772_universal/zero/model.embed_tokens.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bed75d9a6a0c67c98e20e8650fc4f6c781e767dd
--- /dev/null
+++ b/global_step262772_universal/zero/model.embed_tokens.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:421f96fa124ae49c657d5be3c502391f9e3cb8147eed8c191851902a96d22b5e
+size 760321244
diff --git a/global_step262772_universal/zero/model.embed_tokens.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.embed_tokens.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9242b7928b574adb1849aab1bb1584d1316ec6db
--- /dev/null
+++ b/global_step262772_universal/zero/model.embed_tokens.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2bf52fee54eac12bad80a185c257169bdfc0d128b378c42d44f9161ea3ea7f8
+size 760321259
diff --git a/global_step262772_universal/zero/model.embed_tokens.weight/fp32.pt b/global_step262772_universal/zero/model.embed_tokens.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..11418c051589feae7721d48f18fde76dd65dd3b6
--- /dev/null
+++ b/global_step262772_universal/zero/model.embed_tokens.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72496ea0b76d71296d456c776e28a2614bee98cb7e37c9016aa7be2c0518d440
+size 760321165
diff --git a/global_step262772_universal/zero/model.embed_tokens.weight/step.pt b/global_step262772_universal/zero/model.embed_tokens.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.embed_tokens.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6dc2acc7ed669367fd52a243ca03a5a286ec23d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eec0ee5600fa9b05480124179b95824614d6dd26ad3fb6caadf510d2e6969608
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c12f2166b73fd2991d7ec4acacbce02835da5457
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1dbb3f87cb27b5f6ff2ee19db88cb77df6116edefdb304413066049db1e2d1ce
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.0.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.0.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..176800b664199c31d612002db0c8345c589ca8d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9557f20dc5e6ff6c7b550489769bd9682302a2573064285af046614557a5c642
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.0.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.0.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a44785aa8e89db96298f609de05034943ef7dd98
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8de139b0406580e783c6c0734de73ae7a99f77e76981a4271107a26a9fb37e2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1ea03016a718fe4613f3cceaf2d8ceb9d13e7be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a7b0967aff9b563dee22481fafdfa316d0b84101c6a07aad6c3542643f6df04
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0ed0129d904d4bfe8eda919d3bb75fe7c4a98553
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6dcfec001a806aa802f80e13df9a12f8fb938d0d95356ff766172d117efd756
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..56261aa7119762dfa591ac7d21e1d6b250abe4b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b633dfc682f32f3af5721f1e3e0f74de925ed6bf41e108347972271ef13f2e9b
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..357e976d661f9f8dfccd1e1251a59c5fe83bc6ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7413a968724e9d33504e0c55dffaf905eea10453636f2653b0d14f7cb59c16f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c63172ab1b19fad16c9498f5af6009faf2f8afa6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0edcc90e8d3e55908e1568e75c4a61a180dc276a963dbff07eae0e77f71f1a96
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..43bcc623c6e0e873f2aec773343ac99a5ef6afc7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a41d5db3d8cacbd3dbb42afedfcc7907df0a4c91ba4e95e833bdd6e961281406
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ffbe6673812c1d5ad3b509b50c8346fb06a8c95e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c67b9840fd22c47fc0db75fb4070cc7c0df66a45a6dc834942d25b202a6df1cb
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f23187f96a058b66c8d7a5831d7d46fef1aa1bc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd4e0b75f2f2fdaaf690fb830f58bf40df16b01b8c8dbba760f95a74d3fb5b45
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c51a47a7f83605cbbc4558585128082eeb9ed84b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a4083737526eb251d4607ad0d638422fff8e0e1e7f86308d6c41784ad0d211c
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e75b05de1cfdb7703d8a5bed8ad5ba659ecbbaf5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ead5e3dc0d98cfe7817fed158c0ca54504c6cc1149677627b2c39831b673c7ee
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c236511379fd3654733f769c62cdbfe7032825ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f6aa9574d69070bb7ecb6ef7b0e8858090bc10c08aec506e4f3112e21144bc4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b6be383e927cf6fbcd8988696fa008ce6f131d14
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96f6eb1b2f763978c092f284f62204be74c428b1a6c18290ba899d2c0623df16
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..13b573301cb37f0d49c305739c6a6018ef83cd05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d484c7d9b0de884d18649bb0a1f335884b9242d4292d5ba430056f913414a4b1
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..712cef51df24c2568806133990d076f0105ffd44
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4bb6e12bbc1413cda9cc0951e1f91dc995cc85593c668a6f03acaa9e58319d6
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9cc474a971c923b598cf1da0a04108249308d8be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3c6617a041f3748e8181289f24ceff5f2cc982d43fd975da2c8366b91bf3651
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df15f19cec3d1a32173bcd7c38e8969e60c73d34
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f297e1d3bbc8ea0c21fc1b11b90970681b704e3b662b99473cf859c1884133ef
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..07d3e8163cefbb6f838f60b628bf56c68c134636
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e20916aea6240cd434aa26da48df675e4f4ba6b59d9a067e4a2d2a8fd56bdd48
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dcad53350371b3beb88b2511f45d38d305337075
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c1be7eadf6d9dc033847ee913ade2676ff54832e06de6640078ca66c7450a6d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf16a8cce07bba915a773c32422533465f239cd3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8783d4267134189b045a6bf632c72918e7774fdde3dbe0a4af6b64f4966e28d6
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9daf765c64ad163efa246a179343dbf8c24421a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14dd67d7ce0a219a1514cbf1e711ce7454a70f808e9eeafb5b266e5c26a2eb0b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00e50b1108fc3dfb8a118a8b66889c60eb73a5c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d428b0db470fb2efd3ce6dff50c56de02f6d82c00c930714f7bd4ac34e5c5df
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb28c0092df1d4ed0240e91e7ef4f935fc39a82e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55c91a7433fefb18434ce776e6dddc567b4b258b026ed78286a6ad87f8a06080
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d792c62cd8185d6b729e47103f609b59e71b9445
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f815161135baf75aa6a0cac8111d03984316e61209efd4ec2100cdc6a8a66a8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed3b6bb4e0d6c1e3ed63d0de603584593e206aa5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9c337d7fe1cfa0f72e2f62ef43cbd3c435a3c88c021608bb2d18d7e1aaacd57
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46cabf39e34b52348e62e2595f5aa97ebcd47296
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bdfe78e0197b464163d167b81c4d61ebe17878506184d7ef06442370fcbf185
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2e1df714fc7ec47b53c61daaac7f53d265b4e4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c026e2229dedc3ec003318235c3e237568a65105983663ef2eda4cf610e37ee4
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7852f4c2b7d53736f0b259b37667641f6181a676
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3eea02ea642640e3b4647a3d844623235ab152e901071575585623870e488a71
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5e9620cfe7de39626e029eb03c5eb365a9aacff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39ea7bbad68b6a11b9a3c7fab178b3c10e6d83e882e2ad1b57653b20596f53de
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52e3236c82717c213b49b28100cd72d367185296
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edc42e8e0253d17c8aeffd8dd2030855acb6c34daa04e4933fe3388e84ef64a9
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c2b75228538361bfb5b47dfa0b5135c4f35802f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0183956d1afed495af8995f4b5030b45c692f921de84b2d4a0a0321724162ebc
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..287da50886d141214fa47185ec1cfbc6620ea992
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7e25b42e0766f48d7af58b8d0ae3f9ecbe35804d55b75f7d3fc7cadd5f51d4a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..407dc3700bed2a45823f9be278697716bd08d6be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44910dc0958c1c21ce0655622718385b494d27d09e0145838084d71d7244a253
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3be35a030986fa1aebd73cefcf29ac64dcf6c74
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d790aaa33d5152e21d2c9a7800ecc2fda4bd98e99f81d653f7d57f513ffc506e
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..455fbfa6c65b43dbb57e0e0b172ecf7974d1c9e6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6c26e48baaa1fda3da542a65c61cf2b450cb703f6076663e025b6a2e61a1663
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fad882787617f3d5e5d31d3475774338aa27a9f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caa9514e118202056bb3ecf18ec23cd3790f31aae93c2579b731981aafb331f3
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c2a11f80739f09b454ffdeabac3a1270a2f6a04
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbc223a79983275ae6f85b62824bb9893252da55ba3b9c12723f45b4dd415e73
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2c91b74e3ae428876f5c59d8fdafe61e2f8cf2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4459e038b75fb57003dc3e9a04867fa214ba9993474d15313c9afb47cebce0ae
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1735180605629b696190a73f72626859037b5ce
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2884c849fb57c7def0d1d5f1b40869f28fba32d3e66e1b38a758745a506d4251
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a3663d436dac25c780864006ef687e5f2f558477
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4bee74c800b746f3adc41fe84ec75f6cd2352f86d3efb3176cf52020fe36660
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dc958540b51c0628a1e9e6a17e2b2cb019ae0a7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28cd8232a13958e5de1d725e61b72dfdc8d88b68ab3382188d2d11ad16d28035
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e33bfcbd2cd93ca6b5ce227215489b60507ff7a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:196ece365a73d98da5db1243a5afe2886c9417838572465582c7e30dc4f826de
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..41669a955922c879e25e5cf949037e782419968a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f5420024ef848b203a0a8edf456afdbbb162304c2d1d7429313bc5d369c1e35
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9fea35bfc754e17031fc30894b7f267d3187557
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ee173cd6a4921077a9309e07799d8c63e30da3befadfb996d0fecf1252767e0
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..30ef405fdf3603baf84b823bed7e76f31a75226d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ce26c32a860c9102d659205287ceadcac06c54d8bb6a03391482e543103959e
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e9ee32c00846518e3d2aeead56c66a9163803b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2589ec59a821d00773b187bc56a2bc545ea9a28dc6c23caa1bc375996ef6f921
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c86866e8f92a1cf76091babfb61e6e4e41429b3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5530c7f07b9f0502b4091092f5efab90c0092b606b3eeea2e50d3e6405c557f5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..81c1c903c677f210f4c726de0d82364f289276dd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:876775eba34cca352924a51a13a6345f7bd531190746c65e2bba2552bcbddadd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b04ba16662c47674b672c81da24d8ec9679baebd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10227429301445f37883740a95b91502c57bfd9c9206b1446862d8396feb5532
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..586ebf0c1833fdb413eb93c3dd6106d69c90a699
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f3bc5d7461d1405ca198bb47e9ab9b87e93fd160192747192563af9571aa55b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1dceb9c1cb8383d90f963b9850eda3d92f3a17d6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5553349bfdc10a107fa68fd3a89b3b781f382ab2de7eaa7714e68be1f1fc5b76
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2938f6f94086b3016c2404de13b6f3d048106509
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cb72323c93cc44f2f6cbae51c2695e22ab8be6c6c4341c675a79b8b8af82d08
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec0ca030ba6d2ad7012f6608f4d5b3ded5115fb6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a9a9fd51514ddcdb861535b664487fe6bac63d8f2e4ef6be2eb20f9e45d8507
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b2ba1b55e44518f5dbd933070f9c117891ea115
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57d072911a36708cd19c1a2a1b47e853ff4c0fcf76913a0d70f9509ece3627b2
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c339ff756fb2b435be0558bbe3079fa18066b33
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b811a919766622f8e317b9f3049898bf4e3d9d8b40836e72c69a93e828bba58
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8db656ff5e7e5045ae6e18fa526e2ac4329db1f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:274fd7a26ab82698457440be6c042f94f9cbb757195604523703c03e6ee908f3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..149023c5c8f44ef122d8359f241ffe9bfa94000b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e255ff6e58ed5103bef3bd555e92f6d7432904ecdd5228a96056945d817c874
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.0.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd75c626122ab1d8067463ce8f04198a3bddcdee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0670395c24529d1d3dea60fdb83e9d59383a503e652403ad1ee4cafed95d88ac
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.1.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..312f410087682d55023982a4ba3b8c865ef50159
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:321afee2688ca23e5b66a6942d37320da1e6e3f559b5d65fd24c385bf61eac29
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.1.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.1.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..327b448b259f811767d900460d41cf366bababbd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cee137aba8b36dda57f3aeac81d0b1541ca3ed1f01c17f5825d0893d4de47603
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.1.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.1.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2afc93b09eed702f51dd984bacba44adc105eef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:243b8541b4ce880970e5bf3a11dd306259b0325d0fdf817b5fcc6c654145bb48
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb7baff4b93e73604716b9ff3fa9566b654e06bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5889f2fbafa08f9033cc9207d2a0ccfe41838c4e2a5472f8bea92b54a724b784
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cfe4e91a5ecde0fcc815bcebe1f7def53706a7ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ce8e28fda7033219ff122b33033b0cd2d5d1fe340523ae1cb2e0920d7d8d6ce
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b7927fc09977d06a15b8d966cdbc2c0dce631ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a9148c96ea380e85b7cd777c1d516ba894b606a6f671c45c0ca467fe7da3481
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e89f170a78ca92d9e3398bd9a7fb1a8b907f10f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1fca847b060905f745e657a94de3afec98ec307f750cbc58574751da3c2de07
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a7f284aafc506526ff7ff23b718150b5643f38f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6838b1c05fc2ea9725737ef7ff76e60aa520ef823b70731da534bc93324e371a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da0c98ac3f52be48b41b7949d6129577195b9132
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1fb27834f8a9577f15a0a8d691888f4e3591ad656540e28d2288c19d6ba855e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb3d2c5be9d52aafeecbed9f57d7a364543fa32a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:626b73da9fe435a47b775ae7617de4014d39154dbd8f2deba77915170dc5cb34
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fdf7f10714f3bf07b601244d3257ff07eceb167f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8093de171d95bf59dcee929e2647700fc87ddddf2d4cc443a4604cbbaaa5631d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0b6c5ece18997f39b5004dfab42e0e2bcfd5c3f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99e58e1a6318916c0751aecf7c1534902e48fe9ee1ec8e7ffbea1241834cd6e5
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..34fa3d830c895a7a46e4b548ebf8863f7ea1ae1e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d7fe15633addeb16cd1f481985ffafee6701e1e89f983c4ff3436fc588f41df
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..009b3e784efefe5335d121b390b8de729d6ae205
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89cc87d6793f720ed9ca942c808a7379976f962df943909fb786d6afe43f7416
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6164a0a44b64d90b121235a6a52145f3104ce92b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17454db11ef84ebd43fc4bbf53c7335176eb5ca6eaeb429a146499192b1dc26a
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c963d6f6efcecb3fdbb297c4b7252dddc08c154
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a469148f8299f64fbfcba0701705109879c568cd598ac92f49749906602f87d
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..106833a7a327578c65edb32c1a26b4912e998ea2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb8748947adbee565b537c07bdf5201ae2bc8089197b187c38b8a411534e204
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3bad5af6d31d3ac0d31cb93912c05d85cbe365a3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7c0db77b70794213f01c09135323247ebfedbe14a6b84ae5cd4c681cd048381
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ccca1b40139cea02d23be285732b16ef1e8a5b55
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66281239703c195b7304943bf0b2043af7a04750ec3d2702698cda83c3e595be
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b4e770b4869f572dfcbef0cf4d8760939fcf0d14
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eac6aa47c19d6649d2e7bd5ad9559eb6fc8e100604a4f7203ab77022af744981
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..40747ee8a16a2a6e296651319f4fcd446427d46e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3692ded64439bf8bffef2c1c192da93e956d99b32dedeb930f3399e7045d5689
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83b2dc4210755dc00784c5ece1618b1da6001303
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd5dfd08c7671a014815e23d774219d69037cb7530156e29c0d3a3c94735660a
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..875847dbe67566051f58e343aaeac3a6f791976e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21f7cb9f11c5ca5c247b943bdebad0b4fd2d38d4f70849cbd223cb1eb876b62c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa31ea3cf4873fbb69d0424e48a86c222e0b3a4c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e3fb098cdf2a82b708bb534e2396fa29f082075a2261e2b59ba6f650abd8ce9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45031435cb0c4657c70f6c299814b2c5bc08b057
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a10f084f88a5a0dd81f321fa58d923ded8b22aa67928d9cac436f5cf72d8497
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1248406d225859b3c0e33e9f5b97c9af921cbb98
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:000f1c8a4a62d400daffdf9baefd8a16419d1ef21c95fb5a92969b1b4e9e735a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f0f62f5dc07acae8fb15252d0807b7d1abfbeee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad18d955eafc94f8855bfef12dbc9a3c05f97c52c4fb543cec085ca349b2d3cd
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7251228cf613722734f827b8ff745cfded274c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5f40da55af479698b1ce5d73cdd90fe4c1c12ea5c4166e8aaa1af0a819c29ae
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a47c065f612066ac75e94c44f0eb0dacf2a8f95
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:600c2b526eef8948c85f17274c67fbc5dc3efe934f684bf1c57327d9f55cb097
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e675424ba6e9051b6b968911eddd00efb8fd036
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:680671f37c6aaf2a029808a95d675fa516be41466d32b5725c7fb6b5c2de67be
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..face0f3c8038befac1de9a14aa329ad14261325a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee7b54aee35364bb6d3c6ee45a6a333e73d4b259f473cbb702e5ff7d10ca1e49
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..30fa1a77e855e4d5b28319aa6da7c592edd57d1a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5bbb4655108dd36779f0853d893dde62b7f16fc339f188118e4c4fb5e193817
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6a197ab03e5c1e6a86d2757938ed401a71e23051
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dcee1e6112f4c38fd83955bdd0e2219d2b29bf46b605207e79e074202dbbde8b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3133692b4659d581013adc5000069cd5e43ee745
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eaeab2280a85bf419f5cef595c4fa0f02af225743393a75e3e858858d5b41d10
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f8cf124825ecbea01b1b157364597628e38eb7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4417251a89c9eaea53c72a2fd37eb2a0372a751bc263295640d43c7ebc30d40b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb427ea5df20d76f5cadcf5f7a29e406ffe89c90
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1a960e0c97561b1aca776574675be43995205c51de7b8ac125c4a6cfae0c204
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1acf3e01088a0e6e4528a0d5ad8eb7fff5f03c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9755fc5db85d217a176fb849cc13c119fc887c4ac466f8bcf109cdb8c0fa23d8
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..232abefcc368d4e18303b0379fbb5cb995028e5a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8197f6d8264b8fe156015ff98cf1ff81da66c3b05b5fdf8ef27943a949a49388
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1dd488b48b9a400600ded3d4ae7b92ae6c7c1793
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f1c1b86b524fcefb3f967cc47f580e357daf6ec8c1ed1c2be94b3882e801208
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..24e763660a0f56eec93b5939fc46557b0d5e69a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf8eb0d58f1f7b1c4bac0e7015c38e2c942ff07bb192212b1b71a2e5d730f4b2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b82e5968f6356b21194ed1c22ab48ee49dae567
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb65448fc2b91814cc54d049e7a506c779c998d774b070d5849c8c122c3ce3fd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2700ff55872fa8f9ca189922c0392f06921eb8a4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d95b84e2bec51fce9e990a4c738df9d77c5b3dc9a6ebcd58451f6cb6b84d837
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b6377806be2451dde7c748e41598cee82a9bc651
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e5b053353dac57fea1c978a3b9f93b7c7f83f78c4416a103156727934d4d7b9
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45d308d8af78c9e225d828fc4fa7835125339727
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04ed7608716614cb6eada1255e7a3e2442103e656e543925b940acd94ccfed54
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09873c827811b796cca6d9c3d1db54a0b0c2ee95
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afaa90dddeea461bdb3652d0ae1d37228f72e55945830b30f753a7763f95e5fc
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5686be251dc614d3900877741b35167cde39b3d1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d44bf9e22e48dd066ccabf31d18ecc14deb6578302069797e89c7ba1218bd469
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ecccc8d2e91c7b01851091abef56f2fc3fd25ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba8a194b89f46855016f6b5c69eaaa2cc1acb2495e4d683fb3e8ff15039edcbd
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab8014d4794bc6d6967867c1f4c21a201512ee60
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37cc3b76b15fe1d828b12a15f483b8ce48d8491e11148269818312d5d9528886
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cfca5bb0515d08c3950e47a6ad930621d2fcddf9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91a5396925790a9931e81ef8895b198c67cd51c53ce905c37aa0c03a1b27e53a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10430422f79213f75825e439cd68e6160ef0f822
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:569725783e37a6a81b56ae9fd1f0bd94ccfc8122e0e9b83f029a1f09e952ed06
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79cc98f7112cd945d8266934541319dac7b91231
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dccbbb572a8db00d3c46628052fb01d0f23a4aa1a6ef2fa2368c1dc1ee46e1b3
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac288b80b872060f49eb08bbfbc1e81ff3919570
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1eabd058871460b23b38b0d9f3d23249e90fc0defd39bf0d7970c95da8ba3ee5
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2012945f6b76816d3ba4343720ae4e00261cd75
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85c070aaddfbb7c26a2ec4aeebc84bdf831ea8774e01c8b2f2bff9084e81d36d
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a850bbcb1103a14050992e2760bc53e603e0e334
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbc2dc3c1aa9efa7d7361a7404c84f98955751d903a87f153e14e952cbf7f746
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bdffb8a795dc73581560d1f38673e9f6edc38bf0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82937c13168baee6087b23f3229c850e10e88a075d8e01c2c923355b31a44121
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9bd89e8943edbfd78c3daf03bf671a1276893a24
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b3cefc2d81798c137d5313a02ac72ae511b6bc1c39f9d1f8cdad7eb66070d44
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9745611665fa755c536e67094f5a039b89541563
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebd84e9c5eba8ec82b019e65b4cac0737e20c54f7918579d8016e30956d7a803
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6857aa454014ea252844ea8f96649167ddd6333b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b074abffae630a70d3abc24abd92730b4a8b05edbf2deb933cfa6b0050ff46ff
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8cf3cff8b7940d64e8bbf04c7c1c2144c5d5f1d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36da642bb4a141351382bed10a41a055c4bd0965213396b0807fbcd695ec2a82
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.1.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9bfedf01244bc109f58b89d5c51a4b24bbb34749
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a71d3e3c6b7618b24c13a8f73413fb6c7c51b24b9f79271688ba77629dd495e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.10.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b0ef1c1ceb6482d2f919d88758bcc448278e71e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d6891df5e04cdfba431cbe2133b83c2005d26b29dc3655a5dadf54cf8c9d709
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.10.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.10.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ef7d96c4a4932f09d73a0f4cc9be4de456b797e6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee8bb7af578838e3519316773bb455c22093f6badb6f4caf8016abfbc2e64204
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.10.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.10.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..23af880aaeeb655a7af02a1276db09c4737108e3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a77ca884376b1b6127b320a1a623e171c74ce9ebdb45cf36337a0518c99fdec9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20546c4100946648b3548c366a292601628008d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d2269d0ffb13a29f2d1a7f0d0239cff3580ee1d54fa0264b8e4d6485c38e8cf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7cadef4c603d3aa8a056de63ac0145071b8fa01e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb3150c5b9c99dc8eb1824d30586f66bc6dd8523368f96472a67e2f519ac8f22
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b301484cf2237cc10518fc7a5fd37910d1f07c38
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5a866ee7b9ded694996c5f20cbf47a5417f6d48cb2bfcb341ce89046241b93d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f88bfe4ddc57e93396653fa32870ba5f1f239d2d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a656f601f1eeab2657c89c025f28429eab8d48bc4a3ed4c01e3a53624013766
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb18acde36bdfad4be695369d244a0194c6dae67
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cc51b75954d0d866d9c5a56ffea21ea2f3372d8bcd7b073df41ff05736939a8
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1aa53d9fa62abaad43518aaa326b222ae2c5b21a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ef12e2d439b837b15ebcdfb686e6e049a5480e7f86837137c92549fcec9fbdc
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f046e16d48700b169359fb4c9bdf0ceca200d34
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af361ee744f25cc634c09ffbc86befcd0e5ae47ecbb2a906ca89090ffae88a54
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9032f95156bc0a5ea512f8782c82dd5346c35073
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc7140b58468adf59047a7a722bf78de290d3516093ac58eba2780bee25ee60b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..873c832ca8d3c5f85ed60aa0d68985ad2229b561
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54c89c2894f6184659445a661c9d117f7d190db479c7e0df59dc7075b19f0706
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1ef47b677afe2572ff89aecbb0ace08167941c3c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd1cf5d4c604d330ee9fa87c77b3520d98841220ceaa9fe9d10f783bafcbec85
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..229f15e3a921bb822dbdd9a5d38d85c5ad38f9b3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0dad7564318e6c27ddafe7bc4b8169cfd5b785aca937d2f4db0e95febc0b684
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..569d856ac0021f2f1ebee16b507c91052926494b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b932f01b4d64bf8d2b3b82f976eb93ae7440f012925ab9cfb29cec4880440bf
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f88d32870e3c0965d031e4563176eb918bfd5a66
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19c54b196d448e8bebe0d9d7ab3dbfd15be6c4a73e4b7af912bafe19b81c27ae
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dc5ead9e9162e26103a4ccdd9ea1e939d40d7184
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88eb7d879586597b35784cde3f1b4291e2396e910a0f430aabbe4c3b8fe72fec
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c3743073dda7d779dd78adf53a24a1d8509f209
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:011f3786ebe11dd780ddd034309e402a14f90a5c4bf4f7f7a8483b9777d1c148
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..218c52104ab7714a158dba99057a054a97624232
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d2715a31166783b7029040f25a60ec1728b12587fab251af52f448d4609dd74
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..523e4df723ea9988c9d85fdf6f8a94a16b62c74d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85c2bab29b9ba0ebff107e0ce94da57be64072053c48a68e72b3cee52b888109
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80bbdd913fcf26d1321ec7b534d588df3e1b7256
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e94557155c46521e875ce9cda730dfc3b388541b23294ac6b65f26f4cc7da60
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e3e5397ee9230164e2b8ac40ccebb4c784258309
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:569bfcf5266852080b026e13ab4b2d028cb50dc68e0fc02e3ee359ab31955d0b
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0fe798020ccf5e5242144738220a018e457538df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d55942b2bf54ac38309c8f584465af3a330c368c9204ac9f54c5f7f3f257b081
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d06229ee21fe9991a0aa8c74902c430e8f9d6bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c84b563147b0035b657e0cebd5e7b14729382992a26ba67e8bbc6eea553d4510
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97b75790980f8539c88dad89635adcc17905d8bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c78282c97282bac86d92c5c1d0a49efca2720e6a3f1593679907cab3d8a5c20
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25f0fd2daabc0cf7fdf7781a9fca29d923feae13
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9a78ecba968a37e0f4442f2b7b47ffc9b314ac4b9e7a4c5ba6e41b8aab194df
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1cfe44611f9df97194418a6c98bfc5fb710bad9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b3d160fa2f9fda5267f9ab2a006c9aee0456ce7604f7b1de370e0945a3cbd46
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a182b86b6f83b5383b02fcde9841320d4d8b0760
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1372b2c0a7e956a2d337d2930b14bfe5b2751e24ec4b7dbee3e0e12ddec624c8
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5c53ba20da9da40c41e682136b8404f6e1980ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f318d4e512275e1eccebf7ce94fecef76ce6fca69b7777c82c2a9a478e8cdb1
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4eea924512c798b0523da91699370ba0f2a36a2d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b95626d6381f4f3887ed30f14b905d97b784d3bafccc0fe9d9a0c5361a8187f8
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9fa346efddb3859b133173c33ce33d5989c90966
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f8fab914acd7738e8053d87e167f39be34622152f3b0f4432f842a9b5d13fb
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..652950bd607b36ea61739571806410f0ee0c0fe0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2f0bc5f2148419db8ef4daf8e1236a178cae0723c06de077dc5f77876b26587
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2f3b3528fb362b945745ec4251377f818f2f15e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e86fae0aa7d04788169bda9b4b931cd1c600d01b7ee5646e21b261674a096a50
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..170791a578b2f574b1ac68ffcc29605cd4cc4c48
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfa533b886d366e88807db67aa2ca441a25f53cd1beff151f3f4b4417080bf3d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..593a4e76fcd417299109046b70c150e7cad112ac
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26fe8f58488e92d1163e76b478173c5d33313cdac182491903fdba64cd4ef961
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..78ed8e6e4a8970ea3ff7094136407f98044f7869
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a80bbb524715b901c3a16953b38e2a428ad9d479df8efd1bbfe3af591c018985
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da914d97c9f9e52c3b94a186dff10cc9597d3e5b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:755cd9158b96ac6e6bc53556d0b7f4c9e6a92f3c75d80eda7b1988bc5703f971
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e0d51c1662422169472f56b1c8becef91b3a1f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3050f4529f925a3aea7a0f2097769104ceef5d16dabb9c7baf05dc35c473b55
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c43af3d62f089db293351554e7947790a1260431
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d371a6b4c3aed0e4ccee6360929ac7aeaa0d6f2167f9150673ef3e1e817a434
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b486f497ea969f94cbbfae35cc5b8ad48ab7e4f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a90b925c399f27463ddae1e64a72d2ff05e58a52819a1b6910ff2f0e33ec5b62
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab997978ccfe19b705d3ef63f14356716a101728
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b029ceb01aed489c7b6d535358190e348daf0b33a9f85c22c66d14630c776a53
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..528890e140c51fbbe05a3e21c085e263f911c57c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b73beadf68b05f8a379629d89ea90b5967648455cbe3652dd8bf68fc7a1d5676
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d14f0b904b048198e9522cbcef03e145ab78ed81
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4403e956362cab150099186d8c774a75a8897e11bc801badbd30c7299e619982
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d575638423091dd4b638ab14e6d9f2cf0b30d361
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db438493794ddb37bf574f590c264cbd7db01c7e39dd75c33d2a6e76950e1700
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16ddc1e97a12f144b27ff473bfba83dc7a329866
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22d2e73c29567d5b0b1d0f7d1b9790d3d75e4198289321fbb4befd9aaf6b525e
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f2454aa79455754333384ea8b75dbf70b0baf912
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:006111982711c1b64a67fb4a451adb8747bc7663001f4b2e47ec19f829055051
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e609f0571907c6666272b7e6fa7fcc18cf8a3ca0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38fbb7d1ecc3aff03dbde80b59d4da5af5e729a7884f4eb2733dd893c8e28b95
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bb8ff8817e5bd02fae69fb4cc3c031393c38aff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87bbbca19b03614cd584a25de86787a34b159febf2fc83e708f4dade97fd0de6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b986ebdb689337c8f5bd2c336b54893db94a3216
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fbac111c218b1ee64bdd91b941549fe9bda90d8849abe2dc96d0bedb5237d22
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba44e907eb48d47cb8b52c0fba0c7785c2a5f00e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:180b6c5fcd270855248dcd1988292b03ca6b3b2f68e51b9c3108fb9328711cbb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c027cf23cd51d5c94a5bf2abc1abfa9b847e684
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:260e842933f3a7345f06fe1f1e7edf323c80bf3f3e0cb5573ac9fd9a3024cf1f
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70a4ac6e28be5462a1cc1358be324d0c894e79c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd546905864f1bad3f2e643e3ec42e2101fc669f6c3748f23d021095ec3e0bf5
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dd0cd0583b9910ce5d719cb89c6c946d45c4bc4f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:854f59e67e44cab349419d83aee283e0f00b8f1a1e313286cd739c6ac26fc866
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6c0a3f63a8e2099d6ccfdf6e07dc6a97ebea662
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bcc08cb26dc44542ca978966dd4891d8b3a82d5b54855484b6b9c33d9c2d5e8
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f80fa957201db2a922d4b439e0f704fbb52d1193
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c760f5a926227556e4e84e9f8d51530d44c4c94dc79157438cfcc6dc92bb8d50
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b3921bc9f53708858e329dcc0656bb739f2cf97
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4159d3ac577a11e5f88e87329c4486a85c8dce6c2a51f24ffcfbbc6b46517400
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c90cdd231f9924965cfceed6760baa85a7d44a0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ea5a35184c02b1bd4e3e5aa4a50596d86c008ce9e97e91943e493d7b8a7c318
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3cf236a99e5c72ee23f6480620c875b83d4160c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:659bc32a71a7d0e2294cfe13004c0f8a250f1f10553ba0862358671c8e5d3cce
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e8654435ace010be845264b10981c1aa9e89131
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f233ec63877d66aeeab708c39bcd511396d4a7a28be4917e734b83a624a96f3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.10.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0239fc1e6519f5fa45d1c5a1427680bcbfffb014
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88236c3f7a9925ffbe4498ab38af3c3e3b0b12923c812fa9e1112efbf383a043
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.11.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed188f7fed8eb9a183e65043879538c94f504df5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22c818daa58a1fad30ec79a6d4fb89fc863ddc849f03f439186c6fb0fe768f58
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.11.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.11.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa6aa62c89cf88c0ae8c2d5066908b2c172e11f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de7512ea76f7df4e1403c776dee5acf82f3e545cbdf919fcadf54d959af5cd15
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.11.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.11.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..56547a80a318e86e906d11b7dbe97d0491ef7a83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7b73643243648b0c102e69ce529b6f504c6ffce322c7d581e0691078c2afbba
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ddcb0f1741d9499def718220fa6cc59a20fcac7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7aadf7d9708fec627f63718fffd5f6c0525a378c1befc529102484990f624cb5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b7db7b68a62e85e554cbc6ca31469cce145e499e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb6a9358c318a1d7748a1bf531e1b77e22f8fea821e2db8f8c8236561a58dace
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50a3c457cf65009e8d7615fa3e026b92ea18b02a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc3987c005e782e62c2a9b11ca7d39f47a15e5fcd8bd9e68a10d6c5fa47f8011
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..712a65e531cf0271c826c12b1d871d6caf04a546
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8410c4a40973a1316b44f0f034055723f4cacc527661cd5f52272ce38e170d29
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..91c3b77822eeebb80f4d3fc4b97185e852a64f36
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29a2dab72ce8a9fc587f17689a2d495684d0ca8c8968ead9bbae6e3a4e7f75b6
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9e0f76e6efc3353f8c53d3f89c3dda210fccb80
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:952a2c6c7a7f72ed045da0d22e2caed926971ea079f52558c135b2bd84975475
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5df7d8f898512aa5d0ba50370c13efd9a163e6fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14df725d3b3e8d24908f1af8969ee641f611ae11cdbac3c94043e2e7db2171e6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..efb2e0627ab36596f5d6ddc286a3f0bdee48a84c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25593a26fef053634db26f3d754065f0b2c3e9864891fd1cf850bf7b4c011a24
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6961b35c228d9c716e53ba12024cde96fb68e9a0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae48cf1a4ac6895b85011b9a9e0685471f9258634e858f7f417a00fb0047690c
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20030eb77a0b72858211281be04b2bebb4540750
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5a7a474e3fff593f7750b3076c72171e05fe29d2d6aa595b8e041afec9c2253
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0e72ba429c196ffab874bba65bfd054c9f14b703
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73be8c1af4f26214a81815cce8d9b637db2061de1ad02c355396912773abb32e
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c82e4cf0a526356698a4bb15c69682a08f98a7b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60a8c94ec9f9e814d55fdcfc53c83b51ffd32026b0c1c7473e177bf7454dc368
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8a6452693dd13ddc024ff1f553cd9cf6385f0ff1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0854afd2bb59528da7691ef3863d7bd5944a24c0695dbc879f5f689877e6f4b5
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..673f881d4719a3dcbdc9815b5541305722768309
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dff31ce811445d57163e87f71042ab585bfc14cdac14c2f6b7e8657ecc66cd03
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d540af7a2987bfbd3393d719023ca86b6be47b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5568e2f76c34443bfcd07d21afda92ca39442e7eddc6056675d4537aad44e05c
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f6848a3cef568385981b07d9b0392005d2c1b8d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32dfc956d136a10d3bc2b1e6fa7d5d8155cbe3ddfbe7d3b87c282960c031d74e
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02dbbc7d3bdc9e5cea4e9a8d45cd11c4e81cdff2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a35748aaf9261204fa421d07ec243f9e8760dd0b41e790afca32e4ba4eae0b35
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..665e374010e31bfa8094e8fec19875b66417567b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74e4a09e578cb5d0fad56dc435e8346b03a50ff579916f22f0080423f16dd302
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0021d3a1176137a5a100895f06b96072bc28562e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:097c5fb03372886ffe1605f2774ab38d1acd6bba20e6a2cbb5b3b8acc2ec3388
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5734b98b18658a7d613881ac38df6e9b6fb918c1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcefe17df74f2fc9a97fd822d8531f27c43b02b0dee684e3ab15d10e5a48ff87
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..438d667409aba58c6abf9e45cead9785a9c7ef6e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01d5a1277e92ded07c80a54cd739e10ac46bf968e57b8c28be601ef8847c7a44
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6a9c5e82daac0abee4d9172696c3f4d70571ef6f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:381fd1f61f42d376f9d00bfc0011c6416a6cd78a58d9e48bb9b6955fd35554d6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64afbddde1239ce2aea81ccb72f82285fff575c3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19c8cbbb5ff9178e2415bc637e516e56cfe857e1a2a80a623d576b08bad39ca3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52a8ac46eb4e1ee0820c9bf712bbb0d4ab7c287d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8278d35f756b35a1fd91ef7ed3aaeb077a85e07f273d09b9b7922ee87c07bc6b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a4f2157e9f7f12be5ce5dfbec5542bb80c38ac81
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb90c8dceda30d18d5d7a50bae8d69d4fd9fea0bbbf38578801c64696ba552bb
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..daa303cae95208f41570f56b76cda45e7bbd3fc1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3e548b9b536fa4cde4405b4567f605d5caadc828edb75375850f46a16f4f907
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..873b239cdc5e3a9cb7a9696fbdea053b95bd9df4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d9b5003a62c51bdaca311973663c72d8c59507c2b019045825934b413991267
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8de7cd600555ef7ed946db3a8045d63203cfc5d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fbdfd27e95628ffd246033cdee125d6b212ba0ef5c0996e112570579a529e59
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea4eb04ac473bdf96155233513403b2415d0fa2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9076f1dcce000598f7dcb229181dca776db597accd31c1d0239b5f371c51721c
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..704ad3fac6c32301f07665b306a219c552653d1b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:210d413cd2949de10bcbabdd74b95779a731848a2c4b9eaf8890ecd857fa6a19
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ca27eee22cd26383114151c810a8f7ee67d82ccf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d79c9f310778d665586145c627b1abb96780735e5504db28cd3beb80cb044a2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f5799cbdf04e3afc8b41b86ebbbe105304485e0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a99558fb3598da73341dd262ef6912b5c843e067493afbee3f52aa0607aa2c7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e505a10bf6a6e0664e16bed9e3cb49b3e66f060e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1a3021ba811e497aec6d95133d1d8a5f8d1185dbc6266face4d69c97dda3702
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9ea3647c6b309d6bbd242a023348940bf5b4a629
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2211f5ee7ab497f8fc4b0f849fa2a6dd7e50bcff01af64cffef95f1a556ee5a
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2810360ccecfb0c43226622ea5269c579a8c864e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da4576fd219d307dc35fb2d584b07d8ef3325c6df78a11918b1fac0b226ff874
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c61584e455c08734f0819da2b24eec6571e16983
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c34405a8a9e3e02aa6e9b5d3bda7292f35036e20c715f27a6b3ec9e83aae89c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..452fa85a3f4c8b08ca8a56ba4573bd0e546d8106
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:667131c0ad7738d89c184dc63f6d167623aa72eeaecee38042d46e2aa52e7d44
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4df432b5c901328f71d866c2c22760d3663f9e14
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3620db2cc34a2ae5719ded0a56ddde29dac691644a71b473d903ffd5b8f395fa
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa485ca28806832e0051468b2c1a08027f3a022a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b74f39aa4950472433d572fa842322b003267d3270882d7e8062ed0a69195bc
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e0d7272e987c7584093d4c98380e372e2c8391c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73fc85426d1bfaf827d6f2c238aba5469743270de66f1f7f4f9e8a0073aa35b1
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..052be7d820b046c8ce471ad64661ad5c3d0b8a15
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:668beb27eaaeb2b2fc3d127d91fa595440c695d247e06798e779b92345106c10
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..efce9c99f6ef0c4945b812da9fa0fb0aec68cbea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df98f8e4a3c7285b4d1016aeb5b792ca22711922e7a000c33d83050ef0385d0a
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fccce2acac0b445a8e342f64f2c88e14b1dca6b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1047759fce2e2a74998bc003f284e72daa49bb290ac3d3b1e07dd0c08ef037f
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..348870eb5c178635e51a88f9190bf8f099137661
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2ca370564989252b2ea4d14793c5db2025b01b94574703285686750f0d21bda
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c74a78bd93f0f17af6a982d58ada02a384a4460d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f9d0d07e3292c57e4c74a67350b4202f831f3a1cc2bc915648b1243a4a884c9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4947d3ca5296572e59241a2fbe72f6f55503c34f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:820181b3859e1a1af512b05b4ea8a89bc973fba376ab0ebf9d9293da0eef5314
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7251c3007d7912e9886cc6b04d0ace6f9b9a9196
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:279e197edd4983a05c4246eaab102675b522201a990f2a9e72a97a9501719a62
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e5a5e6ec662b99d799018652653e5b80f7868d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2231d23002dadb8887a004fcea69067966c764931940007633305ce2c94d601
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7abd0581705144ccd1e1d519d94be7781347c92d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d5609dbde26d0db0ef29f6091c7922bbebd4f13e0441e82f480eff204525002
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42e7957a5070dd433f317410d12b3095963f70df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:404094365ec63e85060e512620537b4dc7b37489a079c5c38e7e18971811c83d
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a455a896e3ae017cf04ea2800310717adaaf3fa0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03a4024dd7930ed93b5ee25e471a0022834a4db35451c392bb840c696f5b6d71
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe6d40d65bbe961f8dbffb4d28a1d3a136a31f12
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7735e12d4b80d0a556d36ac4da95fcf09178ae80b6ef2b52c1907951b8f8096c
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e99cf5de608a9ece1230e2ff6739a8af61ce64bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1aecd1040e8ab7ffffb205f83611524ce8a693c4fe62fc0944a0de576e3802c8
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..12eee92009ce0918f4e2a1394ac023cbfba82e36
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:270490a29c3aed930a53c2069187ec2772cf1da050903b6190411e66cbf5c01d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cec99b6ff33ed878560d1a103832e077802252a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8943cc67ee998874d2feb3e329396249280f1831720433f9bfd34ccc8e5f86f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee4f105a370218b7fea1eab72b8bd1df9c603eef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74fbd16c223432a7992b2020b152a1ac33fc219aa1712798315e38da8d189d63
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.11.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2f2fe48d4e4686bd06dc4bdeabc0044332355d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82efed5baecd50fc5aaa8f33ff74220ae543b04235ce19eaadc876411cc2ee81
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.12.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c3bc129e3d87b4575e8098ee40006541185f145
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:511c8f2c46ecc7109dfb3d23f7f643c8e4fdd1ee71716d1cb16224ad932a9e1f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.12.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.12.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b4984cf8618a3079880d0cbce016e9cccdb1d5e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a281910625d1102208f861f7577b26fbba3991e9e531ee70e58e57eaa835489
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.12.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.12.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..465d0d510c9077b92949ade7b518a4f5b61a9d81
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2729f32f80cec681ae51db315524d53dc27e7340c5f1487a76693c714b30d736
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8cae98e96d8a5fcdcd944b64f4d6c8ed09dcbef9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61a6d60a4f33f733655e27cdd62e4f4c1afdbdd29cb4d659914bc0d33f818dcd
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09e6046e1ce6dc891d703d238aa110e1ea797bd2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c74dc1a01b9602b972a208f133904aff506e1bf51130f3af38504600053e59c1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74b6fb40f248bd740e6ea1df0465a27d908089c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:719e7edd9ebc2bb552420cf58c747f28a9acbc2092e5a60213aa0c6cace4e5f6
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45a868299001c67b34b972b6724f395cba94e7bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84afc97ad8367c76ad44ea97776f7f177211d143eab16ba0e827ddeced9b0eff
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..959f0211d04f7022bcfdb0ba9078799bbca1c0f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e880b64c9917c2f7905792cefa68aa8e7d3f7f2c6eef7018c3df54a0910f4797
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..255c6cbacdc3aefc6283fd9e0d371c8482fb8bcd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6791955a7785b03166f03839319b1bbee8ed9f5fce7788fe16ccf08ea0680ea8
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8030019c1de4f12cc6b51153ffbc07d83911e862
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aec8dd728911ccc2d9962401c5cb4f369410d6ba12e895971099cd781c415db8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0aea37e1273aa9a292f2382ca09f5d398393f33
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a877b6bdf79554572a83d3fe34b9414233acf9c1fa9f51b20e43254ca1060fd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1dcbb91c9305f8a232b8cc38081cb7d9b41bf66
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e53753225264714f9e414189248c63bc44dfca65de500ad308c2aa5f2b953be7
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..036b0f207d379e6484c96a809590c6e703ebfc9a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ef95de38e815666c135feff95a5a833f5e165694f7fd9d9d849d3c640d1370e
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d8dc633ec9cb312acfcd8a744eb9b3069983cb0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e40379f7334992a283a1066476bbc343afab28f7e655313f203e65f912dda8ef
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3b0c9c75be50f78b3cea32ed2970bb8c22c6d5a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e2740baf442a0c3e3658771e9b4a294f99f3ead75b1685094671c4447854677
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c80fef4f1f9fed7c292d3b09126933ea46036a34
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c54354ae8a2cbf83d2bebf57e7f5b3129a040bd4badc009b9aab2297c6a59e49
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..129a5208e8c3dc1942c9d70160329fd88dd71771
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6de6898972d05ffd0a1e4263a49be06fed5ff06ce3148b289fff7d49dbfd6e29
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09a8f8fe4ab429b2c95f7cec31b8f8b974542971
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a60d8f443095b9a64c796681edf4887b6fe5b463ad5ed8181a800bccdf1e50ad
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab6349b70e40cf910b2f5d3938e99c89a900cf13
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:389c461f8e55269d95750433383e5d374bad714c5f446278b407af3ca1536193
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1022e3293e5a5d0321f7d00d8f82a996927219e8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3b2e4edc6bef0e754afd305290cfc0af72688d9ed9679b07756415d5b22c1e9
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ee59065fea5f4cb5247b09600597646d2f026bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8698502b2c550b95b2dbd1026581d02f2f490c1bc9321589c6d400dd4e6722c4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25dcd6dc0bb6c7863afbdfefbb1702a549a43130
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:858412e4579772589215d53dce3c55379d5f5f70a59f7b707086d0361e5257ef
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d20b2facba17a1c4afe23076c8bfc2a9b921cbf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56c9dff8086cd069f74c4105963a1c63415289b1ecf73a22d4788e07fc84a1a2
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..973ee858e7a9db49278f82e15cddef603cf16716
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:116450f9b29f0c92744ae3186eff66c1625cb0e13e46b8da386922a29e3bb52e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..61d9cfdd367823577483ec665abe3e010e5fa148
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a34be6d7ff0aaa819506b6f5724dc921bfb80cd55115662d8936579cdc05548
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..86be23078e4786a47731406e2647eb87e579ab53
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c19a700b21406d86f336bc086b2d03dd94d1c47b0b634ed97d580c27ffc15775
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96846b9afebadb5f1a2ed24d48cdbbea9f87a888
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84b2e45c43d8b2b4cdbcdde19cb818d13665a688dacd1694ef67aee6456bc9e4
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..543af0a9590378e20481879a2e7247596d3819c1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4edc197fe227aa32e7932f6d59b715bcd4159ab61146b3bf169d92c92d93200
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c8bbcedd6e5b7cf4b2acb346f8a4c9b451a20cd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92c3445fabf128fc2a54a422d18ef5331da565b32889ca959746b1f282b39205
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c62fed39726ba56b706a57e76444d0c674d5868a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48a5d94fe0afaf675c73e3de2e0f8527d5d79779b3eb8e7b7adb6e7059c45c7a
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f249a752788b143002a40165858ef54ca7671e23
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e828516ce62d54880ad9b7555e6383f9b6edc8b058fcfe80e25a8511f3ca07ff
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6982b48ff20bae8a353983c6e2a68cda8c99cd89
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:240b538d4260f58bc114eac1585152c78c483b0f6bab03bfdabbb4bb9b814edd
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d93cb01585e9b089f33b64e8f52710c33485afd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28923dae1a27fbfa5f54076dfa3b480f613a0fa971f2f03db088be793229703a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de1eaea47bedfaf63b811f799801529977d91430
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a64a30b5a4692d6bdb9386f84c2bf6f9b08c7888882bcad2832e9af5c1443d0
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25d2a15044b096751a4e9d1fc745d34dd7f0ecab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:839fa922e17cec60bf5ddc2f7e11848cb814be5e3d8af4a70db63ad87f9ffb8c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c4bc836c722643b9721956d61f305cf3b83d5aa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e11b80b96f38b8859a1eced1eee2fe47c95fe064bee4634e9f8b133bfdca33ae
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..343e059cae0bd1ab20b538451a56b1d5c007a1b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4770f8f612848b2711754402ae4de4c205d1512add755f52afcc783ac86bd707
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c1f3c3514afa0d06a9f7c32b85cad2a190e04f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcc613e3f989530c1ebd7c75d422c67c061d7b4e4bfd24c412b86eab386efc43
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..609ad6baacb09d779eb86add0e3c286065575606
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:375d8596169de389449a1297ed01f5d5ac92c748c19f33517f5b5f4d784d6b8f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b97ff660688cbee99231c83939bcdf62b40bc5b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e59fd680cad965839f0f09d3b65da46ad6a08f28e573a155b5c4634168377c7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c54352cd983507b198f3e699709fea0afb1ae3f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f1707c5791f81b4976ed690c59cf8cbd872c19c9015caaf4c651e978a668b02
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac967ec0aa616b3f54774baf8e7674cc0b195de0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:715bad3de31fbdd138b70ff72ee52aeb04601ee0c768e1fcc17e7543b064299b
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9ff37054b6559f73d0d9138e9d4e7c8026160bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3ba43134e1e99675c5b4febc5b146195691b232e6917216483987d49d101b1c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..505a3c98cb101c897e2dfd2218bf66005b47b1ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c69994a73dfcd42a578ecb75b8515d72932527773e8dfaa6e03a7d3c8862ee6
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..259ea9e1e4636c8ca0991474ee0fce7858c47a00
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7af0a051c4cb99e804d130951062ade2f1c69a4c5bef7e428de05598a87e571e
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74dd4ad51a421c072a7da513376107ea2883468a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92a2b2eede525ae3a8c32bab961386b7f0af2ad52d48e3e44ef7575fdf4f14e3
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ad7d80ae31584c123d75e4e6dc602fcf9541c03
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db2711e96f5ad3ff2c555fe3e8da305fccf5ae2fdab84d4b1fe4d5ceabc284f6
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e634537c8476cb8a9e1ee31a5192af5d79c3cfe6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cf9bce4ebb7f853685c8d9a3989c2e3e3609da3a817ce04ba695547496560fc
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7c8abf603dbbb5429fd0cafdd56fe132f8123586
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6029fe9c6457ff91fdad6d6ecb35ec0b5f39228e84dc8d1f79d9f91ef9331419
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15ed8e01e658870f89b0386ca611f8e38e271749
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58f7ae5eae9913d0ce2725eed26ae1517fe1f8787b6fb66920db441077b0cebb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..81f709a88f6b7a37e6eb618cf1ccd5dd8baf0ae4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20f214f8a88297faaf2ef4bba713661894964b37d10b7f9eff3283694aaebb83
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b03e5742e71b9752b7333f616cbfff9c304b0cd6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa42b620df0d2ef40a9760408b5f9815c6ff6f7617c3f9bf26324ba508d803ac
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58e619422ab787cd36a0adebcccc55a99b77a55a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca5bc0a6aa38971a7467b9d353bcab3a61964bea814d8686a9ef0471196decb3
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99955a67cf4836c4a4c17f7c2cb602391ec947d5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fd880f9250d01bbee8a7a045ceadc293feec9e9c4bb506c984f71bc8665b9d2
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a18e82589aa897e2fac353b7ada11ea62a01d0e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f477107c4a78f6631abe3cb0197a50f1a7da816f1504b983dea6f7825dc9765
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a40f12d4a45345d519ab87efc863d484672a41d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f8c9573577a032b7967edacf8077766376fe9df9ef5401e37e3a49251d338d9
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8d9aa9c943127cab2204ccf74d86524b2657e42
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b819e669df928994a360f0b1578ddb9a28d56053ffa108d43a4a79c28fb43a2c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d59f40dd1befaf017bfb131c10f68f897479b300
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:030659afa8e23882ae036401518e3244aa148a9ed2dcd8c750721dc9c8cb17fe
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..446fb9180bd2dbb84ad6df51a1c5d7481f307c8d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b1f2350e0df54d2774233867a8dfcac942ccddf8d7f74bd5ced9c6aca3e1eac
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.12.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b73a768129addab299202014af25b3784ab70063
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abe78697fa2cbeb007318f4993f537eefb9eb2964aa3f30e80d8b15609f95e5d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.13.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..938ed08eca46f901a624d4e53c40d39490caea11
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bfc2343b73b3e37d7c3f1506314cae71628f24bf84812ea303ab3d3fe0ccaa2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.13.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.13.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..596c3116e42376fd8d86fcf0f7e162855045b88c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00c4ebafad75d34814aeeb72faf52cdee5139c2cc1a9917ebad94d02151a96bc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.13.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.13.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d15065b8679e06f5aed26fa72b06ed45b15e68b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d1ad138c19540dc908aba63e36031e2c39415d224b7305a818f62c5f764bbcb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe322cda4c71052ef2f5dfc362946378a63c7b76
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27de979c9d6dd23e70bc945bbedcd32a043b7bc22426f80c23d6ba8e0b5231e8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..21d34e2fced41644a8220b067f7a084f81628dba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e18158b4471d81575457e729f3d3267a40fb6a4a2e46fe6efaa806c7294cf777
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aaa2e76c82035c6f1ceca88b28ad405abeab6f4e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:070a103033227e5a1b1f77a1137c2c7d2cef3fbdbe624e4429b6312315f19903
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83f55f3ac7dc8bf3c2950aeb1d6a4b2c42300565
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e62f2f7bc7293b58926dbcd69fe5d4b4966fd0ad3fb4b596f2e6444f82d914d4
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b7b935c6f6b55763f0ce2a903f3d03b2b79a5040
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e5b8fad557a125469edb065c27d41e907061ff2f1268ac6f26d3b4166365d9b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..40254ea1b2641c5d3f821cff99b508fb0b7d4423
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cf2f183a669eaf33d34e40be58a1f8c1c80b3742ae63708602cd566673f1ffc
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ddcd4932b82a22e4be42fa577578f9aaf6cf801
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:163fec8c8fa6d3d1ff78751c9dd984be3379090059cfcf3e7413b65283b17f83
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97799ae9121d8ca52bc779b5a7e94aad177106f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e3b3f5e9e4f1835ef204bbf49f070382d9a6cd2e43c0c9332944a62678c6d86
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..40bfe4c0fdce596346f9dc8a9baefc54a8dd839b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:569a8c18ad9ff560df20477f8417a4f7d590a4f78b98391b62b8a1f3c2845989
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a5d896b68ee4076769b943121517cf3421da7644
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a9ae315640751de19c3d6b07f012bebb247abc58d488cedea9e3f4d2c323eaa
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..61a08424171004cffa340925251410adfd016f61
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:299e7f71ed937796eae515d35efa25314cc0d76e5fba7b73a4aafa338ff34e4e
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8bc7137665d6992850e99074dfda1e6af1992d5a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d5faa942b2850393a257e7c440463bc839be0bb80cc22846facc2dd552773d5
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dcaa5c63613b6a3f3725847bf5d719d063b98bee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16cd9d77a9b08d87c42ed47194bca62a9e76b51edae6bc6cf59cf13061414c18
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f264dba14f1708601cdc4daf8820afec0f8b46f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fe6eeb2c033efcf1597bbfabb9a3946cbb7fd260c970fd853b408d848b1dbf4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c26dc76993d7c87d47f73b2fe2b4595de077498
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77cf25a649fdd37c1d8aaf9d1113b70ab3a08bc4e268ca44234c95339ea3de56
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8726f5f8dcdd6cec0c87eae2e2a82f3e3160b40e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b3b9fe9d7a3b6fbd1d69416250ada903240a55e9fb039980abc26e6edefbec2
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..730da6282408b5323c17cfe10df16cf7b76871a5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4daaeef1785bdeb5fa2c6acbbb056f9b3e8bb78a03e04e80b027ece239b0ab4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1ba8dab6ec7a640c56ee98e7722efe137494b76
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ed6c2305c5756725e937df8b96202afa7ab49d29e3ab46ce47458bb3f519526
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..48d985179144c25cd849494132ca9391976a60b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e35378a1ca5dfd38587157ecfab86b3f988aacee61a24367dcf580d984d0581
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..502af7ed379bd68c82b8d04943d6fd7f923575d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:501409acf7473626ce8b35154573d4a6ed5188c6878ae72355810b57431e28f7
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e2accbe2cd1cffba82a54b863a7cce804b6a9c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:608ff2e3e79d1f23110c73f8cae3ede700ceba33f50c28984eee919994fa98b2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1c0e5c84c52fe385f97c45c547d049fe750ac38
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2eddc9f02204bb12285f1c4e677b87ea2c0147e71a933892e1e8457f4068b66b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d99000373f022f8c910a8efc96c184c8d2ef62a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:767441f01a12c0c6f0a6f874ab42f79451aee894fb25bb85dfe72a59a2e1a996
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d7b98fd5cb2f5f58520bfa6ec8baec1da5d6deb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc22f1a63d87e7826fb91b93241c8df1fb93a7ef9db9007041f427a6ff1b3b17
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f9c6cd0c663698d873d6d327cae139fbf9a07ba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2331440b8129fd7a82d81973c598dceba08eb0299a2e13e522b38ad1ee613821
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9fc9fee8a4b7c10e4f65ae20343724b9837718d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eebaed0983aa39890a3ac30c7d2fdb6143fecf300b206365619885303fffac8b
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c652e7ad9454072eee927f76125e35ebdc887671
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7df47a3caf48935670f4be9ee9db6923a9fcc027cde42967a0e48235c71b42c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1886228132dd64241b570a04605a659f343654f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14e98f8f1c2f7df929bffa8609f5dc85362def62f3a0b413c7caf934d06d6fa5
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ad810991f217b40b49c92e22c67ccdef204d64f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86b7b7c05631422783f93f746c7fba6f2e29cbb733f25f98f978f9c972662372
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59f04b19e7d9d20bbf23384cc395b7b727b1f383
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12f37a471e495ec1ec294864467237b149dc934815114ec38f8c62c871058fe4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df845bc31fc1685e768b0535bb06b87e15a1a351
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09bbe0702e9134b194058471da06a359c49563627f0a14700e0a0c90dee85443
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4ccc710c2ad3e12855147d316fd20cfc726de20
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93017fa145149cf01ddddd1078edfb3a07989f890cc5e02fe2f0e6a579ad67d3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..493b29c2a71b77dd850f9f9a45b874bcb8194906
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d88ba63b710d5b6b5debf38fa6443b65a64b4c4822066b89ae5316c1dc9a9c1
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f69e0b15e055b542a6833e273e47fa0c4d4f5022
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5795bb59415b42be5aeec711f932427d0f1395ec1cfef703e870dce045042574
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e8357fe8555ba37754ba0a8dae6a5b4ddcb7a05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34dec1a5c7ac164e3777c0f435c5881ec1a9ac3052042ae846912dd245fc8f2d
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..764dfe71f8d77afdc7c94b92e58e5e66524e57a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68eaa3ee21eec8ee79ca850535c8805b0ff0329b04608994b34f9765a100b7dc
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a2195f333b5d6cac914b75c4f809eb9aa507ffa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72e7ff234469895a2417ba8f1554dce3a34f948e7c59f9af79e506c7434a93d9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3f15188572d6de576e15d494393ad34c9837b76
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef2c8be617251a84113f89b72fee19dcbcaf87f0d0c72b1a9c10f4c3360ca657
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2801d6ca33c7a6b3afe03a482ed167b89b56689f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a8f3bfe745f270df20a32120a28492f6d13f279aa18599a0736ad4a176f0089
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32afa5785b1e8f93f1babb873eda778d6ed65b9a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b5e8291ba836725f980828c8f99896fe1bb0b4ab0111b77c210e3de5b050b18
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aabb12c21174959fbf65f4aec76baabbcb3912b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d960415d7607e9174b7009000cc2a2548c658142b488fc6adce0f48ecbb82a8c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..729590396c90b637c7417376d78c5d3acb630300
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fe64fb858ab4b3e989c463db3f1eb72f3a5a1af7f7a4549a0f270f2efd24995
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0df4c6fab66e8127eac8806655d98b0ae6462218
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb1884d5835b48c42757c238537e0250b94296b60e6f1f8c26c4703d88916be8
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32bf89eb118a307d0ca03d04ea925134f8345188
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:905e16dace9295e3a3a10160608c06cf1d6e25244f2808a73332b7896f603a0a
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8536766cf9547527d1974fef04b1694652799835
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a07e04b1a7e8dd40a1326c37df0019158aefaf20c512c209971b499a141c061
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb92c070d4056a79ad5d31332010d10889c08ccf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c968fb2c8adf9bd337edd3f172fad12e0fc09900cbff427d66ab76b87364a15
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b47736e0c00813bf51af4ec5675338aa6b05f5c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d37f924d8ecf9ff6ac9ceb7872a0ecaa907a915055e4273aa3e92493fc73fdf9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..707e6730b0efb8a9422f310bbbeb0ad003d7273f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9bc9d9b1ba8fa6f3197eb37d2bbc780f4016af8c7b2e2de807a7ce6fa8bc19b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a33c15e1b15c2366f2fdb092a3b499257331ede6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f1078e277d6b864598e4b69e3bb1b88a83fd154b349a37e31ff9f42759eda65
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b4e9d8e559bc30107cf6d484e5b10b1f48ffd211
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2ebe0a36575cb7c7f9a8fa486411ab7b764540e2f72d81040a95f6482421e62
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6e2e2f8c1b3bf4da73ee6cf0afb362593368c75
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fc5ec6cd0b75165bfe5a7448d6c0be27376cb381da831bd4e82019e8c0d85c2
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..905297320629566b543925b2884837f39efa2d8e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7a4accf0ed143b794bf632f57bfcb6d9f329cd1bb9689d996b49123b1769c09
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fbae1776ec1b5847f2883440432444193e0e8867
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77cc4372b54a37b8d5d04817e98c39cf815e81ca14606f0d10c2d4d9b95baace
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..edcdc65b222b6b1224ac6d18277e9dcc6d8acd6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02815b3396f261799c9941deaeae8ad8ec942c8bb8da814b89f3f65fa8f4ec14
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e71370c0315c3ae9bcbafa30f18ea68e5b70564
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f1e556459e44ee5171081af9ae4c5cf04ab3c2e69990a060d3787581292e0ba
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d288f23221b94f9430800fb1184bca35bc1d032c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3d3937ca91f8a911eeae8f0bb4b8e1ae8f09ca9841b78af919e5673dd42823
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.13.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a94292497557fdee69a0d5f0ed05d25a811fa01
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:419559b4cb4b6766bfec16053148e022da68bac6e9f9402eea027f5498148e8a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.14.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c662106d210e062e47cee70a4b777536f1b4eef7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dad2b886531952bcbf113ef144b2eb74f62befae08c6eceb6a04e650ff319ea2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.14.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.14.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b226897accebf5dbd59d8288b57619931c319b00
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8117148f7521f97e4fd9974bedbc7d43e11d147ca1c3872bb48203e772f5457
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.14.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.14.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f04288b88714ae446a84359908fc8c43fcf0d228
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1b4cbb9dd947fc757f1112347c69040677eece9fdc6cba419b5f3b0550dc042
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..77d5aff2762d35512a5bdf47053cbe72bda34280
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a10de73d313724bf43c555ea5feec68784ca29ea75eaf2de6b43700f2a197108
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8344ac4b0ea89be772db74071a276d3654fe7e6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef1711193e395b7f198e79ea2553db0a4217352df16887d5e5845a256a70cf0c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9444517bc9baae6033f2a611899786dadd592bd8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ed5a0aa3a4be77f7aec9f28c272deffc80c3197bd43b6dd49a36fe4c89cf25b
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d584c93650feae7ed3eec864d1ce7983c4d56d6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bb5433172d877bc517b3358aaef07d97eade0cad4c955d98ea375a7afeafd31
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dc954b6345f815fc82a15ef781db07cd69cbc372
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7455a92a79157672ebcf92e0e7b4edc42757683f76e66a7b9ae323237fe6186
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..961d624d0c32a73bcf640d075d11979c8acbe3a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0520a6a7fdad37b7f0fd1e49d6fefa1b624c2d483909f98835346ccb7c8e3dd6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a72e4c0bdb2690b42ab7be6e21e7079e35dd30f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6b13f80a8abeba75ae67066fcf80d7cb4e21a20296d51d1a89c94130f9c435b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1076dbc25c2622f88beb0c6b67915354f32cd9f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a88454ed1f3d88261d454d7c99132e0b719108da9e0fb60e5718f8f5b1fcc104
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f8f069a420ff247da7ace4b439a2a7373a13d9b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d18fd7b8ef3125e6e9856bf79d0fa9469d62c68ffe0c61613908cfd4d661120
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..204347989970ef346eef869345d2fdaea20b82b7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5738646660920f0893529bacce486746f23b551ad04e83896c2102e6a39e779
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bdf4c71ed07f7663721330c4e03a70d1592a4a4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a875886e41851307553623a74a1e2d2d7d2b450ba9b18129986e6eb3317cfb4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..081412ace3fc55e3821272b3c69fdf2f33418ad3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6bf86830ea76619275ef31768f113b40d5ae491757e55e393ec6c9c9f7ec46f
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4706a6739844b27c801f2760cf94892476b8e0e8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:627e5aab967f78bda93fee9f1ebb391f6c9d4b9d6f7618ebf33d70fb325136dd
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..38b425504f1cd4560c8131de9a193b2d60667cd7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d299aca9c242af167a2ca9092bb18c74f61ea05946a7254b12219d2e105faf6
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..322f454dbdd483b414eecfcc9d4136fb79f3dab7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8923af9219bf448f0f9ff2c6c1cca183fd63c76d73c2d2acc03a6000efd06ed2
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f6a60c86ff22b5aeba7ab54ad2ff0d22089c5aa6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af8e75ef158a7df4b62c32acc0952f2f8943b86f93f83121b89574cfc3a98c0d
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7ea92a4955060573ec73779a261921d5cce9f418
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69343f71c395c18b856b9f07d338c13f6b2d7193161ff99b15481e33c84f6e07
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb7969a1108e4e01a248aadbefab4c39a1af8518
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d17a776ef4c85603c78be452248836ce0ade5a643fc26270a6dc42ba009bdf4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d03c0c7a737201cbaf47b7d34dae7e47157f07b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2998571acc2de42e2295dde55880dce61377902fb371cf6e90bded8234b558b
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff284f825cfb0f8a03ce323067e730647b6c4517
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51b03146d5007e0e90a4817215eb6f5aef8998df697c4dc411fa60a49585d74b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b3e45e0f01e2753790644ff00bea645cd9426145
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d2248cdde693a00d55bf29006ca84e3eda23003196b170f3f40ad82406e5398
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4cd8283c6827b19f89a953c5794a5ee673dbb33
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2af24db60fdfdcc010c91134365220bc880ed0bcf0079c703e0d66309403e6b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc5cb572ca5014e078f8d400931f44e36b925ea9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c99726e2a697d90b78f1f6f5575adb4a456d4ccacfabe64b96d759e30d3d1495
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c70e00e11f3cd39f90435f006861c2a08b77b374
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b38d4bb1f8856b0459041279493c8acc5da01c119ac022be2cc4e0f68e152c63
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..430a56f2b8a3b009ee353fe8a8e3897e961afe2a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3629f4922e8ee6c10d6a918b252099a613cd3d2a8a0359625b608de929e40bb
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2ca38b150165100361ef19e5fc0b28042f8f9bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b649440619295ff4f62eb45c96e0fc5fd063aae2d5301e91ec271c7c510f647
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc1654e7565de45e798d1e51b3ea164c391c919b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:936879646082f71362a0f2efd54e6becdb5480cfd7062744b6e8b11f91a0e205
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d95180b8b519d235de642ae0cdb6dde4f662a430
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b41d5150325d5652e0b4da4943257b77af25229b96f616c07667079d3f077cb
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04daedd6a980505c7b36afbf3070fba47367a365
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c5dd024ca259f3dcc9004e59fc9ef74562c0596484e191fc2e0662e120fa4f7
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc46124dd60fa8cd3abc09aea42548a69c6438ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08c6bbffa83d49e93aedf62efc7858d0943509d8171b4fecfbfe33b1fe16acef
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f5b2ef2628978919442d8b5c1e94030c4f21854a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4cba524c4f7658bf974669d5a7c4e59173ae9c172249e8d165a4f2e2c4d4750
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ccbbbf412dc6fea4638df9b398775219254a14a1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32ed0fc8cd81b73d239419a72ea9db619bb6148f320398572e3380df081f5ef3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f1979eda29fd7c8aafb2406a542e80912c751dbd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f2f7c6a3deac45ddcfff30f1061cf1a9a343222d29318442c27f5e139a070b5
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e8dda45104de366e42f0b56d7d8f6c9bf24fff24
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbb0f9d542407a6bae100b8dbe891b989eb84b24df2c184c3b029d5dab331216
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7da86f013d3c47496bbdb6a7aa64e9ed4ecf6535
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55b7f77d6fd4e1a50a0cc7a0b4f536e503971452d47a462fefb8359adbe94fd6
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a567a6c4295090e0a99d804b103ffc3c6633c232
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec3ee0594ea8b5546cd518283ecd259b9a81eee6dba4898218c918eda61ae781
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b7856d1ad2e275894d68b5763b0676e78a340324
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af5b359da075ee372d73bacf5d850e60996ac15bdca79c522aab7d9f93f1be52
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd6992b86a92c6f2b3240f920a19156b252391f5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a73d684a09f1c684e9353d86886ed0e6d6cb3c30938b8d3a45a1368ae35c9f60
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dba276a48c1ec06dab17f896a14cbaf3de4b37bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21bbe90dc2acf605653b384118e0d4c8d99a16f45ca39ce36a870c2224fea865
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d953c7f3f1e6144967828970a2418ffb627526b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11087bce20b27c62c9ffdf516e8c36d5e0eacc3f4fce295e6d32b038649b45ee
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cc8fd238281e5d8cfc508956bd515d3ec10ade6d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c8906aaad213f7b9c17f4aaa7be057e7863b839c1ec827caa1b9d8600c6f42b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19aef26906dfd51d93b5a8ed6c66f7fe09d4d034
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdf33437f119402d7b3fd0aa93aeb34e724a51f0f076ed34fe7b6a93cdb1462d
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..986e05029226dac3e1fcd6927bada80d29be2864
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48d1047398dae2eead556bce6ed1243fab24c6dcb6c93000bc1540d43869f0cb
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c490981ad38ddf2676f3be949047f8b100fbc216
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:485db335527f1186e2d7700c104ef6946cf00f9fd1ef0e8e305ff79a62669631
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c0d6dde9aa2b848de0eef9e54eb735c94e1c4ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fd0ac03a3571bc851f0a454670bc3ade818991c312c6a3e7f64b0745aaa857a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7be5dfa6087c7a208f5133b1dc3c788c6651c869
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a087c67559503344f3be066f6a150c1fc698c8eb6b9277e9ab855fde39c6baa0
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3cda3832555d3bc582610e1bc6b841c855914ff1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9381c9422a0463604c61c57285e1a46b2131fa76fe9c01679817105b711d2a71
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f1a96798ec020e0602592cf43c62465b38b29a5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:340668e60bdf815f0dc8b506966ed876cbbd9fc14dda732537ef6a98ab9d7a7c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..31e5ddc56515646b2bbc31e5e238a7a35a5c4525
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b24a7b3a1fd99a7d9665f5f3f8bcf187ba981d9322fd3897774cff148d58b216
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8bff9a078e633d413d125954cec6422b2e748f11
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04353d8b93daa9c648032b623e82fa252c65ade0f30334ff915c312fd5ac1afd
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..858a3e29f59748d9893b7a73fc0f0c1e739baa77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:328ca7b52abe04742b3e88cf6af4bd5af9db9772b1f31099081166fd29aab23b
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..249f8fb375eef11580edd9e923c10b06f44a8560
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b83f0959de755337d10bde7166254af4ac7731d4efbf462dcf164603aae572f
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f478378368b554c6a89ad2dbb9c19f953c944fe2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fb5d97abb5fb90c5851f53740c91845c90351004dde57f1347f52dde32bab28
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a4d9bc7e2897225a0ac2599366684c275a01fe56
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae6ad0dc21fb2d545d6752e4869355ac6b589ec76f66e6495ac76abdd0508501
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..94dd6747c1fbd31615ad02ede5872ebf8cb565f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54280cfa63f28d665d0b53fd7d5ba6aeb3776b61a03afbf32b60d06d39187965
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2df7b4aa68a63b241c7517fdc2f0011fd484351d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:059aaa3cf997b959db675a43a246b6159303578e2d5991e654888ba586a4e8fd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.14.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d658369f0e76f886029fa68ce0a1797ad7750a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b818f77dae5e09e462ca9e5a7a6ac688186dc13d14da9c5dd71beb564ea701d7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.15.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6333fada896bd701f0104745c4ee34f2db956a73
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f9613727315886c573ccfa9ea619481eb4c0859069784a9fd3bf4b7d0940acc
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.15.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.15.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70a363cbfaf48193848db91f4a6f49630016ded9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ec43290c6de1a792d9c094b6f0db27c2374a4c9946a0478277a3b1e838e4b44
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.15.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.15.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ae3142b73d3f0433ddeebbaa02afc19be800c848
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7ed16cd695d91fe9740db6569077a3dbf2c50793264757ab8609ada2cd42aa7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d99485d1a468d9ebdb2435f54ae5327a91f56109
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05c291ab4edfc84648ed62a521d79760c09553fca9c507d7dbc5556cd01bc06c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0414ee7726b708e16c1b49aeeed7118640a89370
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6805a21dda14e421d27d2ba07bca275aaa934d6fdfe4acb2da541ed1977db4a5
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dba8efff63d2d1fb0147037d7acd92ae3ddffc21
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a7365247d56789bac9476b593fe79bda27d57d5081f20a0ef2730d53c42608d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20297df018a76d25ca420d7a298c4a3f9e108954
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a28f4847de641b48c0037bdd3ccf0417de7c46c536e70de911d7d5aa044ff693
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b17ec07ea0e0407e56f5e74c0109215ea77dc60
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0aa8b1815b5d2428dc8596eb437e5637fae68d1385b3e967182fb2af6cccb605
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bcbfad0c64463847600795a9be397603e5dbbb1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4b6eb61b5abf9936bf78be7be34a5e62b98d010eea0c82449f25a946f76351d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c10a150ecef44982503d0eff18dcced7df65005
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8389d410d46c4b9ef5493b5c9343667ffe1009e1643bcb6c08bab4f6ec19643
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e07c171450b502529ee3e8a62d9a7e28598fffbb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3d78851859c1021a25b8771deb35bbf8ed502a4158b0396ca3aff89169f7e95
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d3d748ab13755626affeab23d5d94ce34ba6597
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66a602fc67824324eb0848042f2bc054aecfda33921486df6319ccdbc0c02e6b
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc683839034d0f3ebbb5d24ea8e64da2ba36e684
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a589066bae38e087b965be32e89853de1b8100a3b047554d006a0eab6f93e3ff
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..723a0c356cfd547c86fe08c4f1c482ce9176d0c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a21dc54e5f4f5112ad3b6136b0cbcf2d51cc96225d9e41e9e17df0b0882043c5
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bffca30ddda80218cc1743ad980e4f2e35fac2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44453bff783528ff26a3afcbd0cecb17591061e950c6ad7ed72bce51c8a55d33
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f13687ffaa602c015949bb0982c0524cbf70ca9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4071e8e720d482329d8ae6c732d869bdbbd77ad91bdc0071744d1387decbc5a7
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..13e5a0975398f336ffb7e48d205ce86cc388aa51
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a586acf5571ee8a3da2d1b07875078d52d4223e8ba9b234a946d0405e414aa14
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..06a956bafd2ef22d4c7205140c4139b40048f20c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b45981850720c3dd3bc3a79a9dc91da36d1c30b8c881d7053b03564a59b4ab9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79b2ec56d446752df5b23346e0398dd3b7544c0c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1c6f62c0268278696d800536996a988a73dd7f5cb5e0a7a306f843c66a21d25
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e17ad94ce472debfb2ea0ab6fa83c3d73de9bbad
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7387502c2767dcc5e574c3ac0248ec6dc0a37f188776d1b499223ebb6a2ad2e
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02f0c9cfdfd6765b151dd5d2be80920d30200b5d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19dcf17cefaa94e652e5d2d308b210cade7fab2ddd47654175ecc90ab4db8f56
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bde5aa9ff84a19008fe77e9f6bfdc144bc3caa71
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8edc8cb623e6501b06df885fe71171b2733640aaf12762e0b62b3dd16e09a77
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2c955fca18def00c47c6c2dee0932478e373ad0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46c64c22129f5626b6bd53cb56c517301289570ad0436989e466bb7ecad17379
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79be7d1e38b63eb60277dbe21af8f5852f5189d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4e6d3c7fe6e3800156b33613b6fbc5ab13e13a4ff9166ad12b6722bd8d6e06a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..edd9f346c5a5c6cf2d0798176d31666fb804a55b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d1d6ff6be0f84624d3ee61010277f45fb5f46b8e0f8bbaddc6444d905ac4e9a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c8fb56c695afb86e9655aba3075457932c824cf3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0cedefec776895db897a873f98ff4f5710e9e99b146ee37232f1bd23e1897489
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..403789cd9755f4603f85c395934701dc032ba372
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:207d7433466a8bff58e4da3b2f57c36e3dea813f79471e4eafa3c56edbbea4c3
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..87c6d1a30746228a4cbe4d9673f6a5667740ff87
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6e0b34876e27712e6ceac462f0c92cee83509e22e7e6d7076aa4e11f6bee02f
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..421974c251852a62c0a45fac569570a76a49cb43
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38d37c13f9711b17862692208b1ed10fe2b636204148e1bd3c6c9b0b6b41e34e
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..524833569f75962d6034c3075d6530ae6a438c22
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b20f934fd217f6dd013d17251509a0ca061dfc7e1a136f2bf4a93271d9c1798
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c41f240e2d1841e413adc9c2f5630fe156924be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bfb903a14582203ce3572dcacedd3c87a55921d5b68344a5444dadbccc1ca1e
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..873e138ff034dafd03dc1fc7744b73bfce3a7d71
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28b4b5ff02d865f34fe0d97d1966bcada60dca443120bd20169cab4c228eea75
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3cb88b5b0b9d68279499228e1e094711de908f6d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a528cc8ff03ea6888800071296f8ea8a7c0598a8c88963d79892a2b895e95a8
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8fbb2602412c868e824165bb84f501efaac96e3c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d1047d8f5d5a09a77e4ca79746434fb1ba5a26df9fc4bb110385338c2fcb8d0
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..681ac0c3c8ae468cf55a7e49e2d6f35202286c10
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49b45e7a09aed733cd6cc706668393697be9a30ad8918add54e34bfb3f90601d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6aa14a52c8c2485711cbe5fa89780753ca52502c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f0151032f4f77995bf10b5c32c82cb91a2e1e67dedc7c0fe510347ade350ec5
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8cb16ae6227562d1eaee5cb39b9e1f92d643ccb3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:052afd68d9aa8edb83cb2ff1337cbcb3729dbe9f08f3a01703323373d9d3d7af
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ddfe7cbd4a5e657bef65629fc07d3e300d02e70
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:161e1f9fe9301df649071bb98d2bda8aaa5baf715ac25b2b69e0f873f3c99460
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..495e320a9019a15312946aac95ee8eb6a5d9a314
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb5fbe018540c7d04e7b48287cc7abaa51582b89d21083ef168739c3227926a6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..540e2f5a91cc8488f33bbb045274b0379aac5029
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb85c4a73903afd0a01a9da01e4dfee469e70def4101c85cff9daf0a401a5056
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe72fa1d92312aa47e8ab6f7afa708b3d28aea3c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1ce648dd3019bd3495f890f2d024c605e220f2128ca2f7f4dfa7441943e8ec7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dbc0fac49585eedf4e6e8c3706cb3f2de8b60de9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bf6d6b8717728dc95a92de2cb255c458db427db39b3df495932411642143ec4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b468a7f04795b381c5548e40c4a34dd5cebc533
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a780c8accf3ac2cc85ade3d2d5db48476882c25d4cac182481cde6c60aee537b
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f9e7a3c8069c92669ba5b79690712e059876694
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b20e6606143e9b4cbd3ee2740912e505cd191ec5cde1243f351ed9447380a52
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..531f819e1bfc5a304d4efbcd9cec447f9ab76b4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f627a1861b79525348190b2af1b20abec322b828dcd6f8cdf12bc4e521fbd05
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ace988cf79f0e07ebe069932c60792b312b5d10e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f6215b2767e4280bf2684bdeba4a51e34074e58b81f81c97dd1ea9476263efe
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da6d9868838f7060d0b957d11dc171c5c56ed27f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f37bdf41a1995ab67c5fbfd60645e9bf4ae2b5305359cd64f5142d602e0947c6
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..396a966c053d37d3ec90e29aacd4a631fac224d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51acf7afa8c0c43748a4123310abfd216465d20c670061f73667d5125a7f177f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1bed1a090e24d97dcc418f940da8fdb1774d89f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48447de5076d8255689c82baa6fe8b56c84f2c552e9b467bf4f36888843c23d5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1588ec73cd31b94c24ca46d847603cbc2ef172b6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d26e042d76fed22db1b09b204b5935cd4f33c2865e2632f63256f9af115ebb6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e03c5794ddeb1231d828767eec67fcd43c086461
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4dce7fb3a0826f39ce26b45ab46d84344dd8858364a4b98e19fe253974768bee
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c886d9c47da6e5a3edfd15fabc50dbd65c70f411
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d87de2b514bb5dd0891f5410339be4b1e3e653ac3cd3858c972dae4bcbe3e0a3
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..82f2ad15169c2acc33dcbe2cc98c5763d1e9e42d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de96fce85dc21b31785ab7530f466d875bae77e35c3dd02330c56331f8809091
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..527b671e9691259a2d5d4e40cdd8863234a49121
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c787f4f0f3d06e22dbaaa8d52ad2686e7b180e23edc813e757db61a82c15cb8b
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f63d6d4a0728eaeaa77dd5d0ddc7f5b6918a0d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68687d87a8dae3577488c5d396af05d4fc6b458489beab3cebefcf6f8c4cbdd9
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ef922cc192752aa45a8671f34a39dc7ef048fc52
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f183cb6a646c1b4820a06abc46afa93bc9804b68ef9cb836b10495dd68a13586
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c8bd7ab4ada720522bd9fe059274ebdae3dfabb3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8b97163184f392cf0c69fa299c2aad0536b526e36c0b3d896a4736ba1ab22ce
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e47bb12e826331ff076e031b0123e9ed6b062eb9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5122b3c279042869f27d136d4da45cd01b03ed7fc66f22410ca1d216ba92f326
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7de912002e8354ac0b1ca474c9ffcb6849516ed6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bbc2c2fb9c75aadd51d2983f72472ced54a1f4cb8bb688d9cf83950e6324668
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.15.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f1b43f4c60c36b4b32d210c790933efec4f8e4d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3831d05f8d909ae7b00fc9ff2a04ee893e5fdb95651af811c419a4a29e02f00
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.16.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..98a164a4cc9ff396337d4673610b2ef814cd32a2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e20774583dd21271df21d138c6306ba5a5035889ad764db4ceb68e1cd352cea5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.16.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.16.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..690117a21fd766cda307d0572c5dfb2fd0fe613c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc10fcbc60f0c26f917b8ffbe4e9019a98554649a9bdb72972d4627a4250231a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.16.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.16.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b253fb43a032e9b08613ae397642d3869d72022
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78ad08b8c35cb78bf068e70fb53f457d7f612e90eb40d0199487a97bc0144728
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04b7ee9ca3b3a7e1106189b35430fc9dc87f5f86
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:620d0b97265d6ff5abde35ba5c4a02cf35a887efd5e9e863372bf621c0540d2e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ffd6b4029f472db127175b5667c7cf2aa7e3908f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c32966de427d166a6e0fc2ecf917724c52a7a6eb74a667430ffc2d13cc943742
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0948afe692d399ed7879a4e9c25452bbf414a685
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b32d5714e5a055e8c051a1094740367319c43941b38d18eca79c8807bdff4eb
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe31de88ae08598b069c028ea5e5c4800feb603a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e84e12623ee645121daa4a30ecec631b6dc77dfbcf05586e5baa943e4a42bea
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8852904f980746b9f0f2ab5db1b4a8cb535cc2c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bb472459a810735c65c5758dc75e050e3f212603ed23e130eb442bdc5cf5446
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d8b23f363b468013d9e98e21f696f56b05b6757
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdbb6951542fe358810316969696447e0406dfce2bd5e9a28b8df50de4038f6f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c66997fa14ac863b82ff4316163796875972f6e6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5271b1616e8598e623d47e8219004bc299b6bbd3b32a5173f4bf944be45e473
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32421ae722c237450940912ec5d1f5489b24ef66
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bd9a0623984ef13343e09a1c670f10d4bfeff1556d7c1a067091c92dafe5474
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..386a29481590479f4aa2473f1ed9ebd683800dcc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d16d8357a34479fd1cce0ec2ddaa99476d5de55ca3cf10e75a84ef8a9f60f6cd
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b4b443b5fefaa7025b2f1faebce38c2a9a5f6a6d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b21c0f49c94531ef1c633a6fa468a5dcb19baffd39fa56f85c4d1d22659d683c
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c6554e8ca9f9a921a462cf7bc23c760319868a8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d37501f74e908bad29c5697f351ae8ce17858d88e54edbe052db66a99383089
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bebe0c5d2320b42b513caf70bd9873f3d25d7d61
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d191035b695c27d57be57a478528e528a424a243a5280329224b1f240d9287a6
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9cf463ad690da1b8b2a7571fcd8d923f80f8251d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:587a6263df138f31eb3c3b289e521eb59d259eb6564d1de0fa4d5d297d528c91
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c309c6113042cca58c9d3d87cbf8d64d4a133ad9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbda41e131dbbe2608317a73eab4830e006aac3f7a7616f1cf68308bc1400814
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b45c88c500e7c6d15185576345e7ea5180aecab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5a561fcbaf657bf996c4f6ddff9a22e68d9338cff292dea3d83169250844d8f
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60c18d8916aa23c6d86e0f497a7329c7df896fda
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d3a3aa59a5668b7ad29c692fa8b5dab3691dd971552d6a461f0b030c604f5c1
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59dc8df9ed829668ea79fb42b21314ce2ee5eabe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5453029e63e6d0d058416342d47433fdc1b07d3c305d3c43603c60ccb15e73fc
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e446895732ec53f0ad7b4970b12bb1f0d9a00b67
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5380bd79a057a7eba5e70fe2a37ecf98bb0187b44bb508e15b04d260cff41172
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed3c7ef783f53105c6a84ef2fe221983b424e1a8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e4d891710d826f152f7ba3fdf30ec1325009e3d8de22c49d176e66ea0eeb8b2
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..912482c1adda2ed9903d4a1c63077846db7490ad
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:251969aa89a2c941f95c8e2184fa21e5cbe832a7dd226f81f4d93b3493cf2284
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9274df6d3696392a470e92128a482fbfba70b5b6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb9a6b847ce161737e0660eb100c855298e0b5c470baab3836ae4e7931c90a9f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d71458c31727e1fe0e7e3df5b9e63e47fa651220
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3a44c175ff3a011c0a91c65e40da973527c1f4979ed00dd5f871ae2ed3275bd
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb383d6ab83b3613c29396c713a6195d7ca72f2f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:318ee4781696120feba74b4ae0b7422b3e5b4d34a06ad6b78ea7eb0418076ecd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..706c012fe3d2b11fa207649b9173882677489236
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89ec2825ec50159a936f415a05249d7617dbaaed5716e445f4cd5afac36795eb
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ada72a360dff39a49d777b4e7f8ecb15cdd9a4c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ec5cd76995203fcb2e1945cb368679d4ff2c3f660f0fbea4e73f3b0e90eb729
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf2c8ebd05c2a30bd71bc249a927887ac649d861
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:511b121d50a1634236130654bbbcdad76c73b565e30594ca5e9ee5f27a498a29
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b744b25772bf6f116fe30ed0628fd6fd0c8261f5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ccd20bfbd6656681bef3bf184d05bcf03a5d2ac7857ba303cd6632df2bd0010
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7199646edb1d881607c9a4ccc6f27b8fe192fb4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34d8eb8f2b62435624dd5275d9e0207694a4248f7d245346d51ea5ab0fa7ff83
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25a512f874f7bd3950e09ec0dd324c0631ec1d2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21e2617c5e61ad7c4cae55f4229d77f4f021b86e31be6d3d1af5ec7336abc8e5
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c727efb5b786fbb640ec5f2c579046b80c46e6f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64d75ec51c8e1c16f0f4549aa7b5bbec44aedbc7c9109b4391ab0b8044d06212
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..489eda9206e009cc52cdb3085f3a80608801c0f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54a57a791f69ca77c0dcd990d44435dac29a44d2399ce54a7e1c76c7596be6a4
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf1ace6224634fff9458850fbd276563e387a36e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b294c5ae34b212d69e020b7f3101f70f82f65aa0267cd33a893a3aad4d9db82
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec7d7f199878e455d97ea1cf43f4e83b9737d64a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:febbab03e45de480d8b802df2b525cab86ae515ca6361517a7bc45b4373b03a5
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74c2bd38ef804e3fc9a1c3f8937a865bbe2be5f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f17328da0aaf481f2ec7b3b9d6420e8bcfcd00e6f5a30d3f3da6bc1b52fe55e
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..512538c038d46b11757971c2c33cd8849a1df9e4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4914231e172329e9e9543277476ffef73fdb36e9bc97368a6dce127edfa5090d
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4953887617f714171b4a32e620ed884b3ae2df4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d08d67a916981c807599ab56232ade24f419bf3e5196ae3a92f09ace3c27c9d3
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e43a21610d8164bec68491090e780037674c85a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05ee3960b80d1ab239c89d1ca91fac5bac41aa1ecf7ba2dc1b2787895bcbd833
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d4f2ce7aeed9f7d8fd3f8ca0962459a410669db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6e3e4548a6c32a8585da495c1ff89f7c7254d44c4e9eab029dab522658f6d50
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..230175091c28ce512c29ba01ae4d192b6512fb9a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23becfef6671ec5ea2029e0b5c215b2f44ce391eac85264537bd5d304286c4f0
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee3e6261d6682f6b270275da908f9d370498a8fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a34133c882951f2bfcb4a336981d608b781d98b3d19183c27a36822f0fb6e4b7
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..47585343a23159d039f60e58b0da3e715bf6a7f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b448df4004623d917e62612bb0447813d5e2e8adef1344c0dd6b384402d3798
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c71ee464be9731a07a7bc5298b02db069f7eb700
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1018f723336e6aae5177913729803b547d423c69c8f5698ca453702f18437281
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c90b504b5bb118023404400df63420a83c4079ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b07ae049a374923da1dd3e7ac77d83af25296704866e144df46d650aa6de2b8
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b29da4821adcd900f6064bf7d29067a788c4eba0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99d411484bd31096ee70fab28ca92d82ca06119548f8f210ff243388b6fab866
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3aa31dcee7f0871dca313a0742f3d4530667195c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bedddf979d85bfe1a04dfa2c72f5d8d5f77113d580a6146afa0e95fee9f4edf
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0dae299a5abbb038f7d7a9f5ebcf101715f7308
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fab6053a4b2ecc7db01cfbb6d856f73c6e6922411855a5b3152efdb57edb9821
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..90c77cdf8d016cb19e6ec99754abc57a550ca425
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01d6f8a53bc6a0b0e45eacd7ffe760b61090f9fbc15fdc0edbe1b8b00c87d9d0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f7211c182270fe622ced95d299a380d9dae577f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa05df168fdac5aabb2612319a51d2e26709a0c6b4650c30e5b1cbbe3095369c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f9bde2ba5408bf824aef56b2ba41c17dac0b036
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ded71cc3341e23c326996a9a23685c7589a58217dab4bd0343fc7f8b2fb49e3
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..504a71b0718378abe56cf97f83478316922121b5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f291956b3112e0605ec45cb5eb15d50a71344cd8f7875c57aca25f6e34b897a2
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a68bb619a1ce4f3ad68419fe03f4353d06bedcf0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27085a042e171f6f31c3ff4a5f0ef9c9abbf3e6b958204558366caf06e8cf36c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a0952c36ae61432ea2e4881aa5cd3cfd46fc8f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fe2feae9b3d93410b5353686ea19c0142ed5864e9631fec80f49e39c2cae930
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb6b816d751553306a9f43774c44f3d8be58cc03
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37770740fe73820922112083b7c06633ce9853e26d7acd967e226c952242a14a
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb70c17a24079c06bf4a480f24bc1a289c5622e7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a1d9210b2eb5a37786de312da6b08475a9a9a35d81487faf3eee35198c8a6c1
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4583dcae69af8d78f596eab23f829601456f86db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a036ef8de2d7dc0df15e2765fb6661f543e592d7190daa81de8bea56c2d99730
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce795faee465c3496d0b86f9a458586e4ae0b65a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:175e7043136f1926d65e613b12000d9534df52eb0c5204567523011677ad2f18
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.16.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9ec2d83f955902b660939bf90fbb636b65496fb0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24d61115cd36ea4a0d1bb2b66ddacc44ad40082372dff440de857bef045f7e21
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.17.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1cbc5647b8a8a1e6d50a5502bf7b34db4afc230
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0e2f5a776bcbbf53339424d2baedce8ab5c6f1b7afa3bdb0d5a0b8e3ce4ea8b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.17.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.17.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b6b9f9b00bed45fc94034c0a46d9bd860eef0d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:296a3f391fc467786bbaa4f99b2c94cc55cedc984355e7c9e4193c1552bcea83
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.17.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.17.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a077f0457e3e57f45543623bd2b6a44bd225dc17
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eec8ab45c4cf4ebe3edcfcc3349c571ffb7dad0b0702ec06ae8aa5a43657377e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79daef939aaf5a6b374d1efee9369bab987e9328
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86be8502c3ddeb3a107b64a23e8d11aa96fe21d2b61c8f1a4f727e5c91f82f2d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa4404171d2ec3ff4d2b382f15b65cf8e77479dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abd9c6468a327bbe8373cf04aef3323a8a24410b8a960fc74169d22f5589e90d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4cd6f1a6190cfebd71391217f7f5d39d49652c7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fba364ff2bad24f87f8011ea9a1ac19f7f2a4f1ebe91491a10a504cec803473
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..baa52abe2e810010ac13576d7a50b7d43df071b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e971cd6773394ea47f4e6b1c29f1d03361de081a0c6ca0484e3369021f8cb26
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f854324fb2a063134ddc878b693cf3fdabd67cbe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:703eb874d51772ce297692428b299ff8510e48fd3e083b92ab1dacfe867f76cd
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f562a80ef34bfe52207a7750c9d1a7420f596a6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e631ff4271b4ceb8cefd74fd586419adb32baffaba2aab2e91335ddc3644ac
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e0d718c4549b878c24ac8a6335e93d1b3c0a1c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:640ba066b9a6af975f76871077889099d2cb9da61a3ee7228e1ecf070b466896
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..abf6cf185ac14071623e65a445455806b1aeb05f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3772bd27d67424087eac41656bb512b136f6744b1664ddfde91b05f45affadda
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f8c3c27a63b95b4b747ab310d78425f8168ac77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e35c759afb595f0b52fbae4119dbacf86faf1b42b7a760c4a08e6af3efa498a5
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..713d4c0af2584ddb1ab906ff46cbf488914ff1be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4925be4e33caadf0507178b2a60f1f3a8820385c0702451565bbb6ca896041f
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..786b349553ed21fdf4f4b4d3dca070722e4866d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6165727df2429dc3163b9dd278db18a530a3b891ae72523dd4fef64688de1dc
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b3389d98abc414edd4482f3037dc85fe0e624098
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd5161bc10ca50891973531b8a7e39ddabcb430762b56f9dc3612f9b95ae6416
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d712ee09d04019b32a39c11cb42fd183b1321d89
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b08da3ddc821093024782f3bf7f472de7676f690dbe38a1e16503842e67768a
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b9eeea84cd8f93b31bc08d6b24518ce80a40467
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da2241f76f35e8df3f280cc5dab01cd7ededc518771eb6f1c3e4148d0581d111
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22e53c358f2647adef626fba325ec56a9552d4a6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46d69079415521ea0b6d51d8e764ec2a2ce02676f19c253d568ccc0fa58766d9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d43540747e12d8880081a799af81c1da47909f66
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:877597389ed52108e06bdc0356efca741aec8718eb7339a44946189aaacb532b
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d665e753480285eb4725ac4b27c23bf755ed1c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecedaf1c71d6be30ca61a3ba0fe26abb7b469218af59946a3291f42298969ec4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37252714a09311df76890c1b0ff3e129538ce129
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acb161a3a30c0a34389c0129e7670c00024a852dfec39a64081fa8137a4db344
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a62dc10bb75d9785a87581ba4a04f298f36346b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a84c3d328e0824f278b6fbcfbdf12daecf7178c0cf7dd2e8cfdcf6c6cc75c7df
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d65358bd399b8ce92b6e6ac758d71b3003bb5376
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a5242a3216938a1b3dd7c64d83439fb606f930338c00f8c878b68f4f504d65a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..253eefcebd2412b43c420796e3c9c8b7e0814e39
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12cc84adcb24af34e41dec68d1287d1335df279d12db62c7b3f13c93e06d7d40
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50e5bf0e0e102237fb84837e0a46a1f0fd417543
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddce13ca9844fb872ca3ff5b609ddc5cb5dd045be101710f28382c904e34c43b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..899768368254ed4f3a1249a412c4f47388d0327d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd62124d99deb2cb0db866afdf2d5cc7c19d780e4229320e18e4bbf8ebe4288
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..07d494801b839228f9e53f5050b419c1de00d4ab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07fec851b6697fada58d70189c291760a7ff295ed836a1a83156a20b4a73e2fb
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69ea51916850028b37d585e35574a8a747e33d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa84b288e2d3a64f07a556efeed498ff2cc2e27d58c4a51819f5acb0125f37af
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d17ad5469d435aa1881ffc66a88f7eecaf63a5f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8c10796430841eced00497ae17908244ce8e9a95177382f2292ef66c1e20fc9
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4164c646aaa4ae197e7f28eb124eb35557d4c10
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75a737d40fe1309789df0bb243eb3b2440c759994ff550011c5a36e573407bcd
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a03a12a937749f4fd73b3c246855f8fc2e4a722
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cef737cda977dcc4f549ea77a33ab9ea9822d9b01f3da45e90321b4d3094e5ef
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a087a16449ca105e4cf1184b020db6bb062a0ac3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6786060d1e17ed5892089c6e4c7fd209c50cd78d0ba49107bb3655e44032f13b
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aabf11f5a6509b77c9ec60512b0867348a3dc8bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:331824fad97f167fdbfa09eb366ea535b6c525fa5550c928283dddf8742526de
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9aafa22f03b6a6235531e8052e9fb52d0161161b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8931ef999028bcce7024817edb740f50f9a90d65ce67a9cd982f011eb6841fff
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..309c2f06b538cc1e36a3c1f7185df7e81689bb90
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c65f211c660548262b7dc1418bce9263686c8b789139e3cec705d6f53461edc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..365480b5a81638c7ce6d838ff477ea49ced4760e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7131de300275baa72900dc9e0f49d649db22862a0c39a640032bf5e1169c7fb
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..143fa0b36a092ab8a327809f7f0b60bf1f7ece97
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe0c5d6072ba1b6ea7632e19ea18f46de7a307418d529d40140d2e383c503add
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba792d3417ed4132a13b818a40e1231f4b9da099
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b74917a1b206fe711b579554cc051acc9c9a510bc6391278b783e5dca4131ed
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9fed7c7bb7613e61cf9867fab0e6c2ebf8eb5bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcf517a6ec6b75d4c96b36928191134d72c0d37875172c7ab4d25951e73d6fb5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd36bd278f7f5d42d1327832049711e762dfa344
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fb5f2741bf6defddfbd19721c1cc5cb61b868be1946fe29065357eba35f66aa
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f65aa0706c63b60c20bdfcb7b41ef82dc0878df1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dfd0f6f6824bb50d5c5b7d4cdcd818e42d81d9307cc2d3202227ab4677ce118
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..069d5630d448756983864e5276cb436ea68cfefc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7314806f41875e5988070e668c30c32f8e4b826ce2d18fd2d97c20c42e19dd92
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa4e0eb9aae4b5aa6dc2f929da3d3e425f3bb2d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42857831105555eca76de563025d8ccccb3955972fcbfbb130410fd9d6f9fde3
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab2cb33231488e622d5884fab8e7a3527c3fbfcf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a162f2e8cf94ed8d58c9f501c308d79c7f30732fb9baa65506713d7a0b37dabb
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c8f1e5d1925268bb7480d2e3795ffbfca39f181
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d194c1e9f66bd00b35496c0c6d96bc8764f052b8aed3e5f558e01cf9bc9c87a
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..76fd62df83011af811effbc2ba2c3d262c453b5a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:414573d5d23767a9647fbc0b67c4fe23a3969d6d436b539744f3b8fcc73e5985
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a87d5ceebc0e65b2d9dd8416040535248795828
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a89d52b102e4b5a49b1a287511199ccdbda255ebb2756a09ef935110a8a7bbf
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b602b0950aa82d46077f4b6c4359e002f4cb78c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24764a7d4da0ed54ea52edc5dea6b89b75fbc8aba4c2a14698dc6770edd6dda9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7741e1bf08abccf9145b6e09196bc4ea928cff7c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:668f76ed2354b73f03c19ad6d0943a8b294e8d99a66a91b8d6bd3d98533705df
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63b4e32815785030458317ae25dbd4298c8d1299
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a127191084a4366fae265c71b5dfd760f57082317ad7c00349ebc24d77e3b37
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6961de49db832d83af5624c619593110d24c1ebe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30c6cc651393cbbf1458af8d490af0387a69e1f0737b13cc803d4379e9ee008e
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2854dda22b35fe1f4de47d0e4a085ca4df6e9fd0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7f770bdb60cc48e220d95550387c70d372be364bdb173f6611fab0c7b95dce2
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ffdca8436e6a09d1f584d304f995d535fe1c187
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8c930713010aa6c59348043df898f865e099af8b5dc6a43fa82a2c551c1d6f6
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3435f29a58f994815ac709a6f8c3862a886b613c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9719a760dc8420de503b21e76b8478240276aeed9d39d86b466b9bc7dfa934f7
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d2f703707d8720c3956230a4928ac40dcf7892b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44c64e4b7a005d24a435d9e948a801ad8ba95cd91f0196dfb37426d18f3f6e2f
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..610bbd8b46c13df63b4bfa098725a98b30500f06
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6c172503b369cec7191905e7aec30c0f31bbdeab319783dc524f5df3a03ed04
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ecac4aa7ea7cfcf1ca000873a87694028a954778
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d55b75ae1cab6222439e2ac96c0070c95fa4ea79b096445af1ec86df4abcc054
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..104ef95658d2872190e387456b968dd31247b169
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41288bb24300c4fdc93cd06ce33e0dc9592a4acdd1184f491c14024f68590228
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d938a84d2f94c9035d01a91a97dab033a826aad4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ec3ecfa381b562cc14545fd8303d227ca6ff670b5d49d1aef1f320c5d9d884b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.17.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6db9856073455b51d1cf616f25a411371ae28973
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c08cc9900d4770bec56b7756291036286bb62c6e29437f2082eb88db0788d9d9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.18.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e85f25e6b88844951bceeea39999f77ace65b298
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cadcccb493432e3d17eb4f4203e860173e97f4dd5b5e3670afa8c776c033593
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.18.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.18.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9fbe5a93c5833eabc7f1c0aa5bd92d258d3b93a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3904e08842dd5a1c81c9d3831a95baec8a47abb786472ba7eb49803964f74426
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.18.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.18.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c47c44fede5ca2fe3ab0b5f3d1cffe7f52d4d451
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b7f1300629e6f99519ac67cd787bb6a9ba7ff0dee13a53df8304e7a5168eb81
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d2255b94fcb0beb9c3e7e49b3bc5a84ee6eff48
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b78bd0352f75d32105e910a378461c987c3bf801cf9712b2b2d19d4cff1451c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2709a120b9776611fa7afa7f7ddc3f8fbfa8fd9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4223ee658afa5a1d87957c6652ed499e10ba2f271d4bd2fb3c77facffd69e4ad
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed3f88194e7017ac00d310770f65f85d96e45750
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa6774df238affb0a55cca3fc9bb43dc817e898a75618747fb3ced7dd6125af2
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22a08acd33167d8db6cdf16692e80b80fd175a3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b6c3ae7281b752055f99cde2ea021d8a78addb1c5587070186d9b4ce78d721a
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39e8472ee69143ddefba1375fbc84cbc263d17ec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3492c0bbd1f5df886450e6f848a01f45ae7047a53cf3b3c0f54d0f5eb3fa62fe
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d8fbc055f3b18daf8f29d5389341e487c957113
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4098dce2ee66465d106579b5be4c11c941ce074646b2810a7e651263e8247697
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ca9f8b82b37fe446f435d0bfa608d4d716912d2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d11f5e3229c3103aeb5fd063da2dc8dd8cb955be9ce19d1ba194bde53257d023
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9bea04b6e8fa6f9ac765c74d0d2fd80a9476503c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0aa002fd624ba52d8608047d2467ad2c4897726e7e10fb9bb72289bbcc8832c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2fe9bd0fdc9f124ac7348b2f44631220a4a2042c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:490524422d0671bf72db88267ec3d4d3a150293de27466033db91c64fecbd428
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5869694d493b2724b5a536fbacbad248ea63024
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3beb8aa1622f6f69a0e7fa8fcc70398aa8a7df64888578dab9a0f4335982c716
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f7298da6b6f9147830b5a0be131c2652a16603f7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcf421575ce41d693530c737f9a86130e7cb9126a55287ea5acd6a1a7ad3fa25
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80c889e12c85022af178b968cd1cf8bf1953f84b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:948cde3da5e8b355de057f61f9a402a16e804a577df9607724df202bcf4aedfd
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00a51f2c3c71cc1246e171d99a9338611c266826
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8222de2c5e6a6e38258e4cc4bded1b60d12521b72d29e7e524a5183460c68053
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..07259e0bed3578b048a7d3dc87e7898035db7196
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0c81d99587aa2a326d6f5a3f9f571906aae36ca3c4e211ff51b695e1c82cbe4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d6013b743abe774502499182b9424d53334d89b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ac2983de2c83b93661d6d5feec2b4a128a585deb4decdeeaca596122947828a
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22b0d6c6ed130aa60d857dc291d75cafda6219da
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c82b523451f38f386e59776a51512ba4edee76daf60c87d89b355c5b3836dd2e
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..753cd4713dd51cc12bf4df54fe8ed92401d01a9c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b29e5de5bac23a55585375f7f012611a77e2ae1d8088b90d822c3c93e283a046
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..844a223763a99b562203f9c375c16c310eaa5933
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:041219c92bcc51bd21194c9cf0a90d1ca43d294124056b3b9ff0ca701bec1ca8
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..56067b5f6d78d646a0060ba7c2122f2e13a22ed6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8fa7e3743c4cf4f80a7908c0dacad18d2cdb18bc849ad5acb361aafe6d3b88e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9fcd29ccfac636651ca69deaa125d008cfb02fc4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62c6e368222a6bd1bd8167acdc4f6a131cecfacdf749197289c4573911307f8d
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de4ec752ff7ffbc748b6d57007b9faa5226eadb3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65ecd373c712d8af386c9732c7b4fa7017173ebd9e231f3875d6af074616b164
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6416756c1c4799903b9ab432cae86591dda97e73
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10a306beea6afe419e38ec5d5b7c90bba25960994b7620ea5b8ac11bb0ce9177
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..53e09b6b9d2cd6d3c283edeeca5f329ab94c2f17
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b207ee5e8f52f2b0c4968d0262e31a67cf631ef404c1a10d7a31fa07ad44a680
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3c4e60eaa8e2ef054cc871e56f6a75a61122e07
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:263b5bf18de2eebac40b03c172a1af26030d0895fef82ea3644b04f822149f9b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63ebf39ff0237ca806ffc05dc8d3f33f3ffda72a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3264fe0c0fd9b1be84248d3b38d888e5919387217034cb3718c5d810bffae128
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ffeb37d7b26a39fe1dbd7a003f615c5b1a0ed73c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16c79ce9dffa00279681a8615965046e860269e3f11879d1a16b2ef95a6e1e61
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9708e9167ab7c258c5b3c1eb753c0695839b0282
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e6266e02a3c0def122a98b3110ba94ba6fa590d3f9e1c432ac0de6427e4565f
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..265cfa3b0a9e094f700ba15ccfa24c6011d9e3d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c826b002ce25c5faff96869565204019ef044c6254f588089fd0281f82b538be
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79f012e616f3fda1ff7d0f1478f654b3b0dd4e2a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02ee79cbc13c6736c708eb6b8c502c455ed32fc7b9fb620e4e7355624f5ac33a
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d561661ae01e1844118d66d4fd260fb6d682137
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79ccd376d97e7bc79a881883c2a5ade63ed06bfdfd3dad08a7955acdc007daa5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58949eb6a2fdbfdd3ad8770676455f8fe6982131
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ffd68da862b4a376ab278c8e34286791638e2d660b2d1ed470af42c6fccbd28
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..608079523cadf9153e483946f7ef16c3c6b024b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05d1d36197e59633293d3f005fde3caaa3d7564994e3ef024b647de87ce0ceb0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9fb30e152f8f8e1c6c68a7a3dc16759395f780c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be5a6090e5fd812418431ecff6d0873b3abde189863cb0f44ac31d0eb06b0afa
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1668be7b8ca1a7a317551a3a7c1121a3d4fb7972
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34a24665971e19b928b90feeb872138bd4c33d4bd70b130551198c449f8097ab
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce5e7a9fd8df150f1a1831732840c935d320f6f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19bbf8128284863a739265e5aa802588be44c4bfb85a3ebeca531805913551dd
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..35a75060746fa96e0634384148ab427724d13519
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82a3cc3b06c4382a4360272f243cd6d9b1307ef56285b1a1d5731b63ccbcf32d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0614a04d58d7bb2f61419012aa03794fa308e295
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5d6f22e73a2604f997fba2c14de10564b8fec84bbac4e4334f924cc931d4762
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fd1bfe1b7b29cd04bdd9426259a9b6eeab91463f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:187f103cc78a7608ff0469aaf58cced00b9674c00cf098e8c62b613e0d72fb02
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..12466b04e47cfd5d5e2a44671aa6f6b80b263a51
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e4886f6d862c6dc4da0b62056df377637a15c3e8e633dae9c8090d240a446ab
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e64c7ad0bb16715aaddab32875a9dd3dda24e46c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74d79d101ede4ecbff2d8eaf40cb179916bba8cbc90f6d880ca3ccda79ba1b33
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f018ce7d7aac1ba97d52706382193f2e3934483
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbe294311057da58b9a7737f1fa9997ce019e85dc0eb26c020d7f28556b81eed
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64dd6cd7b7981d22d467f4bc0f1aaa58f10888af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecbe393d2bff1acba4365d1949791bc2d5ed432616c0af1ca24001dfbd14ffa4
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd34cf51bd5f8f9ef2f8647c3bccdfe8940ff70d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d682d5f1f277d4a7c2225a9dca73e7ced151be0b755ad5581eb35372d4645c1
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..17639ed0749c0d452308e321def22ea3a7a35f23
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e310e0f4cc12efaa2bb081be413557cd0fbc20a613a2851e655d80d489068cf
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4699d99601acaf7207ffd228893bbfc8809029ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8c5fa8364cb98c46028d723e5b95f2f7b467dc5a43797161e2c4b93bc2c795b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b3f2c4e400a96c7df6489fba8b0ec4c9609dc9c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e69bb4117cfba77bcef7686e156bdaf3d9b6c963301c2a1dddc193b2d1804a8b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..35426e043c1edd6fc624e394dd51aa1401714953
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cac8536fd8fa2a3c0c66d4e74ba3e932ace70cd806d63705d6fe5afdb0cfbcd6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c57b47a7e3ade5a499636fab1ac4a75a237a10b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f5208954c1444140a97b418ed38b1ef202678ba971f5ef9bcf4edd11e1a50fa
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..169a8527cd80ea0095e63f8042a109fbee142647
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad829c174971e72b0ca48b1656f6a3a7d9c3e2c5a57345a4deb4bafb14039c17
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3161f1f02653d05a0bdba756025d62b9dd5f1673
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29756bb953be86ed988fb4b9a1de39558482f9548836d1b1d85a63f1fc83ca5a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8cd4d1d622a8f8383733b0aa20780952b8ade3a1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27d8ecf821223d0ab96d7f1fb1630f493d2edd4344d61f66a05b0d8873e55b45
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c7cb765be763fcd2849ec10c5509dcdee67459a6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed6f055d2b7acc2c58d6c0d02203372f012f9dad2aafefed405669d20b4b2bfa
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..efff6761758eacaca87e471c08c520e1f254b569
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cefd03155436d818e42b089f8235bcf0d86d04d3d32734444d76c086ce7c5af
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69a17b7b4850b9fb4b5592b9c24a472439ccdfd6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e0fdd0ca2f8acb510fd405b6275410f03799b6ca4f07f4064533a4dc5646e38
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc87d5eb89773e6ab991266a6310d4b3c8c521b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e6fa82e33e0d361f7d812e91b5513aaca1492e8cefb024de5399293051606fc
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d3d20ef0856c1be9f1df11add2c3d63ba547f40
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b164343548b47bbf7b7da30071cbe34943b4bdbcbb177b777d1d605f53019382
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.18.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..280d2fb7bfdfa3550d8e93acbd415eb7b769d845
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbfb0cea12fac2845a314329f81f6fdd4d2eaba9ddaba90383a13ffa6c5e72dd
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.19.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39a2f1f3edae48ddf97720033ae8c20011a645d1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b14c9ef9ac8fe1db0583e90911fed0a3a1ef0e62880933ec9762d2b338770650
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.19.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.19.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3e9662bae7a9cb5109bf84dfeb06fcadc2c836a5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed6b35078e2a6caeefb149677b0acb589c675c3561a7c0e0476cb5fceb8fd1c0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.19.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.19.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9b38fea064e3290e68fd4b50f1bd2ec6b59de2c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5f374278947838f57322ff9aae292b589769183a3c7c38281e351b902cacd8f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d8206f19c0a6550dc558b9fbc8d96b3cc4aaee70
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38a28b84b610dbaeb2a02f3b71149108a8d92c30e0af73b65d39ea4e7bbf3d2a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f2d4633ffbad59c727c4f49230e622c8cd68d50d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0500e7b13e08e6833143cd13bed787d57be744f5bfd8196735a18a278fc2284
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79286555700b56edafe3e3fac9155f84171e98e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:effd4662a4a8273fea1464c5be8b438fa61c4b68b828ed0054048f90a0fc6717
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e3bcfe98ece651b2bd3fed6e7816e313f994594b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3d72915b705326739d9231271a29f7bc759536c63e52ed2c1f0cdcbb738676e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c5fd09624d58ab43bac8fbd94769e7a04f469e8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae82282364901c7b23f845d6ed49c1cce20b79117782cc5b78fed0c5d39a364a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f81f76fdeeee9db837e9443bcef175c7756ab5a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b493d598a7d88f334cda21a4e189ad31b165678ea4d72c7f28d4931d9ba4c2b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ae6928b7f15515de8aef1e6582e0dca259edb80
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f5b6c80ef8e3e4b8c119ed8a26490b6a12e2a3e57627506cf0bcfe427d2f49d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e66bc512f9e4ee791f892e9e84b6e77aeee6c67
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6545d0914e00e6d402d8f95d946ce8f2048cee71bf7f934a2e52e04e933ba3c1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8ee1bb67aeff539750517b1636aa26eeda2d209
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e03ed7da30ea86a0b022676e273e232a08931d7495645f9c110458c5498dd3db
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..38d2f05efc9b44e6ede1abb42096cb982b3af302
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1247353d3f72c3526adeb0c2905e18f6cdcfeb49fc8d9e8e353b213bb54e57c2
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df55b673bd246f479ff03de364ae41aae6b28826
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:906710c81d83da7c8a9db3e34f0e93bee2c292ddd8b63b2d11667f8a93e862aa
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..847494530d5993410465aaf7a78cb2d1a922476f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:849068614f279482fca680df95659cbada349ee5e2fa6c87520ef9fa516be729
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..718696b77a278a2de4f632ec4e8c717173dd5a28
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:509ed709d31b752abd7a67f131eac25c536e556c79a9a5a9867614fc85d7aeb9
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f71192621c30d9c4353119d3faf2c3e75d5acebc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0606b71f4fbcf226d58d10ae234fb875946294a863208eea42f7753d859f93c
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..57c812cb71c8f9cb51ed336e64c2dde898552b3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f155886544465d376a5bb40a4b1ed654a2f018ebbe1e6cea9a1f8ab46f2aa258
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f9aa27ca27706ef89fdbb900a8d00ef2eb7c62d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d343cf810d1cbfbdcc0ac281afcb65dfc372ee5b1656b9a4bb096c2aaac76c9
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0bebe3dbbe55445b64bfc2c5e3a3802b78528c5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09ebe87271b73b514decb7e41aaade5a02863e15f960a227d679c0079480f627
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dafa6330cb4fa9331260f3d6296d8f9b4cdc0cfc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:753e73bc07856fd48d1fa718ac4c71b9a8aaf991366e1349183419e0d37705e6
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45eeae9e2008f331d9d338e40888e28760320fd4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d26514a16b53d7c4726d26e4d4a7b4bc5ae8b39da77cdf54aff31f02ae089e42
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fbc1b3394dc2d1232b58128790cbfe4900c36c5f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bfe4f42e52f048a5005de538fcc2e380b3b9259eb0e00a7b5171b92932053ff
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b9db83741554283e23c1d7928ee0587d5de356d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d6a9826b4525ba36b46b2209dbd9f576a6bc1e5283b02d374caf0ac3e333f66
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a77f10bf0010e9a0109219cf97c399dcc841b0d2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4db8b26804ba67cef4ed0d03da0e3f10cb4cee6e6c3a27498a9a2ab830787301
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c834ad8856fd3f7bcc7978087ae3a2ab5a5aac79
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5929b7f81e3fe9e2f001b7508367fbdf9f1ba74fffe2f47298dc0a4b8b4d6cf1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..77031855bd794c3c25db3c90eec4cc56a5a502d6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a0ef47cf4d38b994985ffc0f35e38436561bc3333f01c1052f221e057eca116
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de63d8dac4b9ad42a066b0c648b55515b897f216
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2756f66d02e918fd6b6467ff705ef5c7f846319f9d23d624a18d76c90af159a4
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6606efb780046c4c41f32331b7b4017006eca27b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9a7d3c843cbecd9939a18bbe821d201cdc2b3d9a0dd572cbf099bdd83b5f640
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..716d41768af1f7d8a87c1311027335f1a37a3ea8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7c42055e60fc767163505718212cd68323c0d005893789225c5ee0736fddaee
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec69f0c1ee81aebe4803fece3a56cfc46cafeebc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cda42e79ea5a02fb82fc83e547de416f5dc71e3656d99b837ea23c2b56923be9
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b16ba2668e83b3c0f4d75b1022be1cb3bdf05ed1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbfbd776dfb16985012a23dacd22a333aec9b3d5d6e326152d69619202d2a8cb
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..958f019beea912b1ae9cc1e2c3e9f768a60b1110
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f8a234148a362ac304566260d849a5076ce64d8876c699cdcbf1833722dcc3c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b3ba255bb41aa985142daab7244e2feda595608
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e7593a852286ac05a9fe92c3e65368e075c811f05808490a87221d533bf451d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e10fef329ca70561fd7578f6d9ba119f667c73b7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78b6f9482bda69540645c203f38250518da75bcdd5dc6f8a9dd9214a3aa6f61f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5d77b246eb094c7b9a21fc84d30ed95290faf69
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fd898ec34d696f2b94991d8d976ad564786bcb4ece20e1632d306de2cacef9d
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f1a033d145290daf91d5aefd509a37bfb0175ca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:630820b9334f9149cf5ec002ba18bdf3f9676987a643f4aca0cd1dbbfb19eb70
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7ce8ab1a602306be914100a28b71c6d610363356
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6a73c2680b0ca2f2ceb117e19472868e9c17a218f943289facc7a5135e8604e
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f215e75847bede64684bfc74a7f9af2cfb025c0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99f429400dc54127a075d4ec56b10fda1bec06631d3bc2f0e437e4fb0edf5135
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..78564954808295d4e97e61cfd0587db88708f74e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eab9237faec629a69148d8920f4818c4d8b1b4f75d4b2215c888ae7c15132e33
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d661a2c112116fde29e5963fd45ba1e60fe16b48
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0b94d8f705fc35127ee2a141364f9fc7590f66a763f1aa96ed27c00299ce6bc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..23c9e602351d7b31b72555d3198b0c74e1c5dfb7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0597806f549892331a8c4633b34dee1d5da64a2feec9cc06056caf7105febb4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..385b8689d67955b5c60e4634c6b691c336451030
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:278841f2b2bedecc70bcfcc7ff8311df40978dbe7635d027b39956cf104ff336
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3e4bd3544e9ef215245f0e870ef14dcc592b519
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38039a125adcc7626f9a79c09b0d60025b37ac2aa009a58ada15a22e48a71a78
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2de85718d545dea8491818987a4a9ec781f923f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caf8a28f864a1cec28db2963d9506dbf94abc8db69d454eef984764a405fe124
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fd840a1f7b482bcf143a58f77069ddce159b26ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0de9873b88f918b724d6369b25ea3f0edb35cc639a8c58c3a4939fd25651bec
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..29718048750e63f0a7d28fce579b94dbe47bf7df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c773fcafed2d098ebae54ce737b3774686d343a7840320c9a3ab699d27dec81
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..080f3e9271e16b0a5b0a192df80f5606e45fd011
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47effa4b5a8a205d9c1c41ba38590e3d155d6085ee2ae412991ab240a2244407
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..06d7d488a2a8b29a712611df40cd1a1d3d66aceb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5499b5981cd9355237a14b8e8e64f93f436b1d19113ccf5681cdb7b1d7c9752
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99c45e5de7eedbd0307ee32dc7b81aa0c9986cb9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0daca2314a0b68af31696fa36ce18c0475e322109f7d043d3800a8f8ed64fc66
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e0305a2153d35d6e0414e86e2b96ae42edb3a8d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a458409f4badc20edd8a766864e13779bf99b4f282825f14f929fcd5a203d11
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3306305b7d586f3b5f7db011c313b6a766a16b82
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1dd6d13cffe37f4511364927ae4df105bcf87d4ee3986d43c806e1c0496aebb5
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80fa23232cf6fa101c6ce900543e9ac4c6764878
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8438f3311cf0bdf854ccb172039f050f1763524def6d4262847400340b8892c2
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..390cf2b58162718b49ebb8518e539c721a41dcd8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18b05456dd9511e6753370351bce2cbd2c1fbbb623e38986b82e90a8c03cca89
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6524a576b2de24b640bda1b73753b5f19034616b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9425654050bcc26d34c5bf2e4f560042f4c330355eea244ca6619d6b1776dd1b
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..870f1995207d2d388639478299803a2dbf68368d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17c345e429a33bfae423fe44367d12ef519510a0b5802506100e6066821fd90e
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b20744b95d46cf813b126a3a0e05d25e07711c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d170fb81b06a204a765e1190a29e8c2b79070a03ccb48ffb1d01d91c9ce99998
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ddffcb0b087d486c2a24154cc0b0e6287a0a51a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:809c8b2482fef3fe2e12a3a456d398984160c14f3de6fa7e799ed896f60a8432
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d746fecf98626b10d2b8581599a62a010d008318
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d39ed8cb425c49291b62f381b0a9851802cc1ce7dc3750a4eab021c85004759b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.19.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b2c4eadac26785dec66186416e7f6262767eccd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:745d6719d5bdd48c41d79590064e426c96d263738ce35b2b545bc3c7c4f85beb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.2.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bab8c9fef073558e58ce1a19624b4ac60747139f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adcd2a840aeb3a33b15c23feb255e2c514f150c8d87d15d6e5a539577f6975ad
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.2.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.2.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..575e86f52e41018f567065d3acff8b779ef5f817
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3df193fd4e407ce99374ea888605bea13890b0d310eb210c8a6c804ed51cf29
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.2.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.2.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a80f28390f36a110d0bcfd9e6e1952e0de9bafc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61fcd58a4b06da2f168e3c81f0873b278a32e5f44877c98953a060a741f5c987
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be2ca9f3aed8079d5259d829c4561551ed2dfc11
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:196426d2cc7c96f63e551ed3d470116048d28b27a2f170983baef2c4e7ccf79d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..07a4a41fe0f913220a9a6224c21fb11635dec01b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c2a0029204c4a7f862557ea0dce142b67f18fc82758bf68fd95faeee5829f69
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a89f4b185a359402c38c9135f1247f46f9012cec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3349fdc5bb505af388ea14821eb1ea4e1a6591bc5df70b473f0fadb4dd2788d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bc3a7450e19fa8b01bb14c95f5f2478a37e7dd5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e02ccf20db8b2dfae7948c76ced6e9d9a2f08deb7933b19eb20ae1295e968cb
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1191ceea116f3c71451d71bc6ebe2052976a4213
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e315c603ddd163fa1b6b8243377f5b77b2720194d6a3cbdcb7c1c97bf882c9c9
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..969186ff8adca140666aeaf0b20e4f8033ccabf6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da2d5898e4dd0d38385a78daa319d737af754ec029582b06533d2a34d03c6d9f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..65b5af0258f1d8208971fcba69106fc67ac5bbf8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:222eda6e6381af27488445d4d670e71e1ff59620eef85e8e0f2201f91afe750a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6379f1bab2a1e5f50acd00d88de3b13d1ecc3fe5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23fd7e1f2746c798f08a2b94fb838b77836fef89230815cb19dcf2f4f14e8c6e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..56d281ff6a917fb4f8d20e01e900e5288f970141
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b139fe5baf11fc5eedc5ab177913b67818b8f01d95738787967b25fbc5b5bb94
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b3af551f4d2c8b5d9691be3e2fb0d938218a0d6a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05f1c2de207571a62f3255ed84cdedd610c60fc6ba49187b92e08bf6b0772cc0
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..924c5831041a5f704a00aa32524e9aa4468c4450
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa124f0447c66fd8aa908c1db5dbf1b2632a41100b20dfdd94d97a6e4fa83204
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d79efae8e2ea8d7d66e3a6464be77e098b3c2c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eba4fc895906071292318a2f44c3b45f44ad9f45d46f3b0b0977008b9a58311d
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..982c82c05ada38a8738645eda1289248551d268f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b4753612dfcb8fb3674a1263eb8f4d1d4f1ccbe3fa3e5b8ac57234827893876
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..47cbe0605bb8b51669fc6eadad4f90994c33e2b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:132d11f7b0139194fd4ac7bd649fbddcf866f7335e4d0e2c06bc93193260241f
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7c574ecd9ad885ee1a281c8457872e6919d16451
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14f60e69051552b19bb506471793418fe139efa2c400f2f344e81a970f2318a7
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d833ba7c028170c5f8096ddfd3c1f09e6c59123
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c84d92550a634ef3d863b3116b8c9700c31ba2c8ec5ad5e90e4a221e2d01fd1
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96e5973eea8325a1de679563a078ed5c04b8a6a5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45dcf27af435854cca8267bd1a7c2de64f13eb5085c0a7f37267a9970640eb43
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb75c0aeb1470ddb6ce7cadd17bf4d9638ff5a5d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f28cd8d961b14e42723e2f5a42beff398c44329edeb60a148054dfedc0815ac8
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ae8dd913b51fda6e8e9e71c0f986be28961d8e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7f18b2b96d6fcbea512ca6c7115efc329bd1e304357d86c8cecec2adfdb0906
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..18b41a489379d78ee458f5f5ecf325ae19e24eb5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:266c433fce5c8a9503043720b52e012986ee01e676cee5a49d51601fdd0757ba
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dab2dfb1b5523a3716f02786fb23eeb4d74290c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be97ab1e07c6d38f21d4906411f42cf3f95e82461be38616505e9aead0a4c1bd
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e3918fcccdc48dd703a2b22829e48f45626881f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65e30353bdb3dce0a8fe5f3c2037162863a71b097e8ce7706262eb347daadd30
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e09859b82add0c5d9b30849fac7fef14b57dc7f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cde0a586213ba34492455a0677bec2fe297d91f51de64bffc2580f2ad80b373d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4aff0664d241d459a184005c136f0ec1bc51e495
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fde8e7bcbbcc57e697a6336466aca2b4ebcead407d8106b904ee808941aef69
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c20b26e08932d5e06cea2e6a72d68f0521b160a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fed21937b98f40b54331b4f1b56aab0fd7336519db382ddb5dfa91b9db48812
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff6216318cc948f5809f9526e866ae12cb992a45
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4437863682e7d0c934f1dccf091a81427ae378404e59dea8421c8468a0ed46db
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..12d69134c634a88e5dec298e6748d86061934f03
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3c8b136218f3a6487684a7291265c77e772f3a095000e807d105b1752b31f5d
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d4afced82ac815de574e6f40a9d0f37a013f15e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f2b9d59c8cbb72368c1c0a4f3b33524cf78c85222decf968ed654aeecbce845
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42dc95157ffc2fecf83b6930bbf6842b0dc4b344
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:074da381663a2922f23ea5c80633cc37db16827a37cfc87d5619421ee789eab9
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac5bec983fee3bfc9b18a885699721e2a683111b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c9b70e612b721583cbc5d8c16219a2b04572ddb85e3a1811a28e3f0b6dd0f4f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ae925e11194fd6d4cc1fd9a6f4f79d73c259d40
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7e83529f4276dcdd76f9aebafc1b455313ac368602c763ef8792ecccce39c5c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4bcc8464482c2f41f7d9970a747a48f4d67a60e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71e4584d081fd1c2ab9e88b6738a2f051e8b7a9f7ef63087b21ca173bc1640c0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4fc787bbbaf44eb2aa57ce3dfdede436ffd1591
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f95fab15c56b7e79d255c578c64b1401b2b4ef93d906ed7d24a6a58d1fb655f
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fd7d1b04a25a8f48e36fa61acefe537eca52bce5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8999e0edc60f87be70cb6b6edc93829c0247fb664f6b13a689c4a217a04391b
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80d8da2cdbed7eb57cf1a2f1aad8b5a3bd9ec385
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0c83ad9076c1a080b00a80bb17e352b1932d8b320e80b60e617c3259b089ee3
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69cf1c11e6fd5bda65d8a6424d98c9fab6f46c55
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbcc6ccd1a6f82f0fc20f83ac0e389ba7cf945f869737b1f1dc9cb560465eb96
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf2454a93dbcf4e9873240429727d6a50e077051
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85ba96a63283e0d02c2021082597d2001869ca083f1a24b82c45936f9a5cdcae
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0dbbb1365298e273b9a80c394ef93ce4c054eeb0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:985aeebe3798f2db961cd0b8ff599bfa34eaf086fd0486dd36cd9422e08ff466
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9fba1ca5d3649ca34c8bb8b9720e1990767630f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10852ae76b4567704a82c612947b908af79f00ebfeb4e32ee8533dd955a1b404
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e8bbc652f10e0411cce1e620b86802e3bf8bde4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb195392a134ff2ac1acbfa908ec9ac28f0ef5b9c5ab6b6454c159b9c4bdc83c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e406478d85c2a5b962b072173dc5811c52ae5a8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d30ed0a5290c0c5437e5292183d5e08ab07dd3a4c30eaf32fb4b3a41df29812
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f3fd58e2702cfab83a1d88b669a8efe1872c96d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e05d031c2a0cc0468d32126f26526109e5aa82ce6411e3131a9ee7288b7d2899
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9698ce9e012ba013d3162d11f2ea438843decafc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:877b7e34a42b866c3aec187b7f5a49fcf47af975b32bdaf3e5ec27a5972be66b
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5619dbe6eb56ccbf5db7483d02b9e03d36f2ed8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abb92f21871196618a8ee16ec4ed1f6686c77c302047087b1816942b0d5b0556
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6bb5e8235d7e6248a9c8e709ef841709a778be15
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6636a3b4665e41ee5b7c95784a582232d4fab56306de4d7495d48b520ba258e8
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..143f883e560f211f4260ec152ef49fdaf8cb48b5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3df80ef081ad60e75eb250ead49834c86d194e9f2eb0425ddc8e463faabd8e8b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..33b9085789ca4509492f15380d238dfbdff82048
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c7dd4699b9aaa6ee4cf9d1811adf2de97d7762bdbff334df90afcaba1eab4a0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c4857e04403642ea3020d4fff45d414599cfa5f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec7c2b80bf09a0ed3c4f493e811b82b61c6b331e98703169688b21c14be241d3
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4642c0f3ee8b5956ea36766782602c5c89518f79
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21704e6288a5878fc6b3d4840f4a0d87059cba3e96f903a60272d913703ee5f2
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f053e138949026384d32c277d9bcf41ca5232285
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e141260dd6a5c9aa8cd9adb40aeea5e83aebbce139e330dcaef7e8e21355e00
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44f3ce1dc53d0dcde503f79acb5b3b79c116ee77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b80a205dd3dd5fbb7d5477fc2a8c4a8e5a4e462741fc51c237508dceef8b817
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16ccc92f7771f394969e4b4b794bbd0d81bf8982
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd5893ee01e139ad79a9a5d25c7d111ea44dc9e77a045141366cfe89c85a0ad1
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af8fc894a0c654bb633d10e696c437c74619fb88
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:436c8d620f57914fed103bf2cbb1d648ec89bff450096690d64f20b0e243d643
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a29e27c1856491efb95c5bd1d28430e2b160a3f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fc4c56c60dea2a2d2c604a8d8e2e1bf08597e878ba480fdb3c054a1025b039e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac2e229ede88e5eb02aaac75910c9664fee32bbd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:893707dc659d0f535648b4b27aa318d2bf9542cf9a5f8584b323856c6036097c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f6ca89de1324ee59058c42d37035f12f0acf260
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65ebe1aa03f19e38bd4cfcabb684844f789b8b5ac60046e161c1afb9a03ef7e4
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.2.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92a775dead7b9dee7554c5675b2eddd97b8a9acc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcdbc26c26deba9d8b45c1847b06ad7a9d9a20864dd8827978591438856ff9ba
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.20.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..683485d0e3febb2dc9dc46ca6278670658401c83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c9974e86c5619fa22e12acd6f5d9ebd6a36070f6fb7e6b066bbb3a102face38
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.20.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.20.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2eb04ad2a2a2c0c2d1c01a2551706497b903f8c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1a72a1018eba2407f88f2296430a7eba563d5be91b9ea3a9d0f5b87e5460a02
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.20.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.20.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..87d29ce183a987d42f45cdb12d6cae3ffdd9aacc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d524d31cbe7953ccc7d76f78fdc6a18b2f226cebcf8e1d0f8651292e50c7682
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3432eb4067a29591ac040aa87caa7d92d12f9045
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:447a9d138b1521bc1781b1de7acf3fe2c5b6ae9d1f21ca06b7974f55b9cf819e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2af8056475641dbe7c65a16d52bbfa6c6b5a7075
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a6a2bb29f082e0b0ae8ed18469c261e8ce4396ae9f29f14325cca75f4d887fd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..48a6ca64fc98ff3c6b83f9d198d64d0ed762ac3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfb4f8c869c3f2729c61deace82b159f1f1a5bd4f15a03858a658663a6b66818
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f90ad9d1e77ee1353e38175749d4a0e3a6cb0c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:479aab7383c795ffb1f0b3d14477d9673062d5f78d031183be37e7d87df101fb
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32ed025226ba6cf62a561006e92c21694b6983ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00a69bca3d6b38033c2ae3059430a59672fc8484c2f358986309104c2a684f2c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..531c4c8f6d380c9edae98190893835bf9914e5e3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:881a7115b85abb5060acc1001873fe83bb91e1b7558d3235db658cad7c4b312b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39b0edd5c9600fc00299d3842a5137c1961496fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:956240d944619e082ebfa432ee2d660d10b1545e0c9fdcae3829e190e02c06aa
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c387ee9a60451d2ef1143629b3fb9d41f355488f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a482df477b92853f45463cc48454cf8cde2389de618dac782774f36220ea8fc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e7b2633ac51d383d98e33e940952b8311b67d1d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67fd1a1a6a3bcd40b58cf7282d11da0ff6dcab441f3b9c652658c8c1ee9b6558
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c209a85ea5ab6e2c616b331215f86358eeefc06c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dabce9165ddbd4e0f5d40e83306e1a084ce19a893a299bf398343823fa353554
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6460f12e1e03b5833d9992ca765fb247ff7ed7ec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:061aaa6e349db4fa0cbcdd364b4810b5e35d7ce6a17de45c31ac4e99fc49f7cd
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d23a3ebd0d3b2bd71cd3284c9d12ca868be934ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b0fa5a27c000a3bccf363727991c484a691cac979f6dbb1533acf4e4175b554
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bd38c940038c43d52a18a1813d5f74062f72105
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f06837d18f6b1c1e08195fea2052a7da5556507eb0c0640dbf7588c729b1ab98
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dd5afa6e685fc9b11e075db89e421829116c7e7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af798895be4f3f9b8465240ca03930360b8c271b6a89880d887e55f071554b34
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66e6fc4973db9d16c4422ffddceb5351baa185ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:770703b8beb262c5d66a91a6f4e15904cdbcf4fcf27c1dc2598cf40f7196ea8a
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb452291970aa048e2c8075efc99943b31a9b5c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f8c281f760818227bda93f79e6812cad049905c5418b7e0771699602af9bdfa
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c89589c79d80270beb711cb31d12d2884229e320
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afa7926dd1cd027c6a5f71e4de794c8abefb9cc591a1093fba0f6dd9f4b635ff
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3e6352151262e0af6cbb649832fd30e81a6079a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe2052f62d9e8b3264a2b30403d375c4d90402302b5ac88d480a92bc8ebb934d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c125abe4dfcc70886ed14f47e17993a1cb61639c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f3ac0b1ede807f22da317b77143204b85bd744182d51e2263bf0423e87922b6
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..330f1e2fb4f85b7f5f7cf1c6f96facd8ff062eb5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:187730f5a49c7dcf8937f7d798003b48545003509982e5a5eece6854468e826c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..079ee90edf7c65c2ee80a87b65eec87a7153b310
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d709f87724a18f91ca6b967031fa3ff2e92f370b5840dbb4892b69cd00c0cdf6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a224b77d2656d8a43fcf82db6b109c593dbb903
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f225d2221423f76470b05bc4e910420614e63b8aa12a24499b92832d056961eb
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15402b5ccd6a8922a08110fb8e07a285b2d82889
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc05422b3b7848d31415e7cc39f845a6fcffddacd157914232c5ac7382c69683
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cfa308a99480587b10e67931c849aabc689952e6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cdf8d4e85bd2da9571632bcfc70086af276dedac83dbab89b2907ccd81b5d9c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9f049ddf31424481ce0709b457a503f29d28d6a8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c4e7fd0933057921e9e8f733925aa15456cb8a81e4ce6b7b1882594f901165b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..95126d4c8bee43ab3c9378c7c888870f36a90d30
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b0d75f94cf3f9495b819d7ad10145c82671507e56c0314ba39bac5d4ebd922c
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..483070d1da78f96b4652a0b3843a27817b04f659
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9656dc96786e69da4d0a073995c27eb4f69e39831973d8efd1c3f783d8cf70c7
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39fb9af95686e91ea05ca0e601fe11ad79baf619
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44d8f53f57f0932ce86a3635194ca8060921db3f85bcaec47f8cedab7ed48ddd
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be964aa5ca228e686ad30c6bc02f45747cb1e47d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a251b97736f3e537a499f9d51b16b1e963c9d3ef8d118658afdb97888fcf58b6
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a4b970c3ee2fb321053f303255e76c4466ae2b63
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ede9c02e35e679f36c0751e99619a1c3617c204c0671e0c0073c08c45761145
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a82051cb57383ccbb3b8aebfff5e28cf3e4c1b05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03ad9dcaefb0cddba760369682b15c8fc1d85c4ba897b1fd4b3efd6676432279
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..61920fac3f1018f768b16b4692cb648584584e69
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:595128608a909eb16939c1b8debdc53143366b39c14a7d37110296fc9c005691
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02c7e49c1663b4539f0b1cff70d85f41db9a2944
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88f7eab3cf87cd87dcb090746605b03bbaa3e1d8e1268eb20d01cc007a2fa4ae
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c18697993415827808201d23dea135768a2032f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7831c0783c8d1e6dd31d8b699ba128f67cda9e9e1ec422adf84f5cd004d7934
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..321ca078e8203e40ca808c25f015a2b410ce67cf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04905a634a6d2eecf3747c51c95603954a7fc94c24951ade774162be8c73a4cb
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66bd1f3ab316f9ccea31634d977ccd7ec4691de0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51fa2edf592c7a9b7005ffb5f6d174f9d00bd1a2465fff97f85ed5b50e59b230
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..298d320f6276fda6665cad174bd4726de9d7272a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1e3bf401cf33666b5632e97f3c8849cec8b3437c8c9f902eebc8c2a03c38ee7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99c123d4624d663da269a72bf83977f89394d9d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3be7bfa930ce5548b8e4e210909e25a2add39393b578db4cdbbab7051abdf349
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ce3610882de0b4d5a7a8f0cd1561154c5c0f6c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caea2179cc9465d75169078261eca9b5b385bb11faf014629cff3231808c098c
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44550bf84fca2b5a211b5e28d04f0dcf01cc3904
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04d4c42664e25f258c332ba8a9a135bc9671a8c006b818a9e27271ab5dd4685e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..013e2a269f463dfbe54553967ff7e72b10a07bb7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5056ccf5310734237e1b6ce0b93fe70ed7f1efb7d6c5892ddd8090e25ca7f9d4
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..846ac243bd47b2597d57a84c6891087dd7598882
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:530c09214c1bb08b574a186fb54a65cddf36c8c5b6726db0d5ec1c1cb4214563
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..33ac75e57508f1d54b90f199ac3fc601727cf7db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20feefce98e66199ba89ef00c77dab7731401120a1828ca61f844626997cf498
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f38a7ebfa481a9c0eae593985b34f704468f950
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dac9347eb76927f79923a2ab2d6c9fc502c841a4a0cb10a7df79a0c80340645
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f41994c967f978fd577ed3fad36f63419dadb66a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48c845859f4cf9a66a22c3c2526e3fb30bcb50df8fbe0a0da78c5d99ad5e2ca6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..962c0f5c87e0fb4e5a7615cd0764315961e90673
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e04e65ef2cc27770a95e57d290fb742b46efaafda12faa632935d0c491c73a8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b2e21f9743dc92907e8451ed4fcb3f078ba20bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b7f9bc1f3e03b6233fb658c7aa9a1eba42a47c8b20c0ce2df2ab81f8e88eb37
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..032a25c96f6a51e79a2f85952e5204c05c56f98f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47805711ff4d75287a3d023098a86c063dc87d5ddcea62f8e5d52d71d59a1dae
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb3b43fccd6a46e4c372af7384979d27cade4adc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55371a0779c926249e4b7890c86dc98e90c8beff22a0328e0afc10f4560f294a
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..604e61a24d98dbaf86b2735d346997161d903757
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1feaa7e491231291e31939af000db91f802886e82740267e23e4e8c9c879b11a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba46790598a0eb60c73ac31a609cb36becb5d315
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7ff0f52704080d031c24e640711195bb2f41b455bfdbace5e2f3a792edce1a8
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..acfea716ae509f27376d57caa0af01c813a55f81
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee9ab5b5d7d449033149fe4b87524f1244228db8fa280b2093e40f18aa786107
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50bb31c20552fb42f2d43ac9aad88aef342a32f0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4b4de5f134511d9c8d623bfcd0593cd09e41201b4d51ef3907f64cb79dbde10
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..35f6c3fe3c511583a8757c896d6a88c04f88121e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:962f08f64d9629321ae79565fa77a7d137326064a277acf5c2cd10f3f806649e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1848c27e3a032997d8cb508ec519bce799c3cdc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbc4b199434d7a466c784f2a35810ae8f5d92c764c0a650267e5225dd36e6740
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1cf33431ec87bcedf59e91117788311aa3029720
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dd57d6501064c5304f8234387c5974ee84d4ec473138f6bb004e8cd758e87df
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.20.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c5d8a0e8d7e7d42456d4e2b5f1778c0e8e14739
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64c5cf96df9b2f8c87c154c55f1e1c5358f0bff9cb4ab3d3f40a24d5564de64a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.21.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6392ba0c55329adb051d9306fe23ab2698e09a5e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4d9f02bbb629b6b223fd327c0b825f89f64949ec45998da05543221ba909e7c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.21.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.21.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a66bb0dd549192f94b5d6b5fa51b0a99b7331dd4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c2077b9cd333ace21d517d660d6fe7d5088d195297834ab493dc8f55939915f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.21.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.21.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d540caee82a8ae89a7a91e65980c57c383b9b206
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a02a60f57a9de02d6b2101f08d09b9f195047c57424d9642890ee7eca17189b2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99091877a04b53fd9fecbcee92b5adce8a834a9a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6738674459d0decf22278f8e84d417ea9e130b4430fa8c44aa460cb4f9e8ac41
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9d3b52465d7d87c446693a40aed613337742163
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01c9735fb7d7d25d7f6b99a0d913199926de7c8e092276936b9925266bfd30f1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d132f64eb2b3025fb64a917ab13b2292ff2ebc3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:942be628ba0f15f0489839687fdf398bb622484c320c298d0c1d29afa2c5cc60
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02957ca802bc5db3eaf9241e03158c630cd0994d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4bbdf9bda4011315efb0a7069e6eb3eeea8f3dec88f96dfacbca7b45c936900
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f7456892cc2197dcfde62ce9b27217e0953e52dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf4e1ce810212fb20ae3693d9293eb1aa5a47821f6a22f2efdfce05dd017cbad
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3361bdc0bbfff4ed5e125eecf9a30c7c97c3be03
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dce6cff3ef3d72502c82117671f482d7862eeca9d8fb5c31e5e8b7661b130d72
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9820c845a58aa8412ed4ed42d10d52a134b020bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f004fba8570f3172d5d14037ba0ab0450ae8ece532105ce4ebab9996af97ded
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..725fc7bc955ac8cf25d9c93fefaca7db3aa30cb7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ebd640d4a5ee8422b27ced668bff5cb4f6a0cc9adc31e42d9881e2f0022665a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20d1bd243184b719a47216f96e5c3f6b3d4e6040
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ea180ce675f1b2255d61ccc948dd7e1de4d5204be3fda989d109ddc79759beb
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a7d95caa8e1800d61acf6b949dfb6b1bb5c84d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e166fa67e3460e6a68bf5b74abce212968f09bb7a80d11d7c40995331da67d06
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c2584dff997832c117b5c6c0909bc69813043b3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8655df37bb772bc8d2d9f4d2949bc54f0db07fa3cd4830e6e00eece1f38cd2f
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45eb8e174fda7219121ab6f09f8b21440deb0b61
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d1f0d050b6cfa7054ebea332867422bdd85417302dbce38caf3fbb53ab019ce
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f5616416b7f7eb75548eb7ee5d74a725b796ec2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffdf4395af0e36d291ce6aff24c5166ae1c32a5fd2b651a7e31619206deae8f2
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6a6c55a4ca4001c1fbaebba4ccbe8b66a4ac14ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f080d3a07e66e7f3c6e0f0224c2bce1fa2c2d22735d2e4348c990c1c1d204b1
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3dea5538ea859f99527f43e52f0c28bc955c7f76
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66f3c85d3c7fe82b933f0f87756787d987cc603ce1c45b35205618c2f1d59007
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0df265115c859c634cf1f9a4ada2f33e6f7fecb7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40cdb1ddc8064ebec327a1bb6b77b50249bf59aa1b77fa99bc902b82c115d445
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec4c6fa1c634792e802daf239c3b7ec855edb96f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7aa595fb796a3568f74ca2a0cdc7ca2ea9b32bcf20dced29e6bad89cd9d93eae
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..988fd09b6c1fd6d1ca5129994f19f04e897d96a8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a42a8e1cc2b2d5248327d5c0610eb10b16e7e43567ff0a5ba47696cc6896c7e2
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce017a0cf094e8755aa7fc094078b6d7de128bef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d00c6eae72ce96201190bd6afb822ef546df511ed4d9b690a2ab802eb97d1f07
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3724143c5cb40995255830d1e99d695d5926d7d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c61c99c9657b01d29062517fae0b768527682e7456da75bd9b20347df69e2f8e
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f184d275e17bb835a04bfe14b0e7af6845261257
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:def15c73b25dc2014dfce25474e65ac9ebe86977cf4845dc57e04e1e5f421b28
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a79a3d06953b808b7ad23c7b10445c519a7d1d53
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13c1f6409b0701fa3baf8dcc35eb31897805d223b163e9f519c851acab34c609
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ecc6b4c014d95bc8040e04c02f8ce7f940f2eeff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2f05e5aed9c273bdcd87a1118c31a1d64e2a5bc80fefa6a96500e95689772da
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..724cbab2d2f5a27319d90ce7142ad6e786bff4f0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08c18837f2d3fa1cc8dfb7ce02a5552fd2e0ef3b6ec884a27ef258d75e0f4c22
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f3f4b05abd2d82a77f361ba556b226660f773b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61a609a4fc5511d70e92d49517428bbf0a5035c659fc6fed1ca759ec96ffd01b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1cd3ac0a9d885d7bdcad0384faa3d9bf5b893c5e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8001ff405e73f777476ea98d56b57fb142292943aa1aa90cf40f5ba336f69586
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..67bcd313846e510803904d57d12f34908bf762af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9f9c90b8859778c93a4ff00adacf1ef6331f2d358159ffd6df89ac9ab416535
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64927ee82b8f8da4117f4b7cc0edfe7e21005ece
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb5842737772f61db40b278a241232d8ac66f8655f386df4c84b9fbac8f94bef
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64fb7d556cd3c514b0f48c0ff5b06ffaf7da328f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f8b3a96574df386e41ee041b949aaf31320d4bd8a110bc1bdfd5855928ddaaa
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6550aee1f4182b8c4280fc5f0aac43900084935d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76b54238d597abb4907a05a1d7f206ddb206191c228e373b759a2a7f0cb6bf50
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b6a6ca418759c66dce79f336687e14578061a12
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ce539a7e5f9925298233167b718894036129537a1518c755937248a7dde45de
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..571bed0166831b647c2789e2fbfb380ac2ce7803
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70cbc723b204a97ac6f575216efa72afb79a9c9010d3dca0b2a896c0b8545c95
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42974919696466404deb6ccd5322489052c8abe4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4a59e9505089dbd5abe51e8d5d4505702541a566f848f9756b40169f0dfc6bc
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..21e6f896c5773e5b65444fc391a3b97e74d581c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8294159ab77b269fe74e54acf19955aa5a434b3a03e6e5b7f395558ffe6d429b
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c69b057b933b9503845098de2993cfe84a8cff72
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9270dfba5e8997693fa556849a27796efcd0778eddcc22aba1b93429957fc21
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5478e845b25fe8e38aeefef1562d5edb69e7c53
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a989282af43164af8db2f5c403527d37443578743ae2495f87e6fb8c7176578b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c756de47bd50e0f76d7bf838c2cc188776e7632
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2950bf4e6cca039f31f0faedf01f62c5f55340a30450a040958095a80845d248
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed828f062d02b27309fc92f6afee146719507a6a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d80519e9ff6d8b25eadd9785a699e9d7f1d808f139b1bbb41b28276b0c99103c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f14c473f71a716ea1f970194272da20008c42da8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6991e8dbd11bd52a4e5b8a1169b9c0d07f3f2db20c0c259f6fa8ec8096c06eb9
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4411270fcf7dd768b15448881c490038fb683713
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45b4c9009310b5e121610219dbffca003197529358e200df904010bfe4243884
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9213633521405c2e150468283318cdf0c5d049c3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17ef7f1b6b6131a04e4c17fde0c02aa007cd7d130f4a80d1c5989e93e1abaf30
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf5b2c0b0371aebf104c76a3b4fe23d45ec78923
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31b1930dc00bf4cacf997db651a06b54bed4e4727da3f2d20cc47ebf24acaaec
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64cae25521b5e149e4828ce9ea8686a706d2ce2b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1a2711c8b721dee6f17c06a4094b570be97bea092f7bc91ccc831678470715d
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..13812db2e02857de52aab6123c3c1a4519bfd42f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13f1a5a96bf6f73ad3c981caa1cc451b2606f1be4896f8d0d15fd527dbf3cc5a
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d77bd3c7918eeeb32447268534f4a22b04f8db6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:379273d00cd62f49911a3132b320050fff903c0c0239017369f205f9ae5eca6b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa8070526250c8f000f4406d4177c22188cb4fa1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b73aa15e8744eedc72fb53426a91407161b5ea917814f1a9c615430d037f7415
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cebcebc88f2c4c7215ccc9b89e0546cca31dbb91
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6212aa286576f963b2715389d87afa737d4d626d1f287fdb4d1da727176a9bd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be15d587e2fe76305bbaff395d2c5bc33bc8fcda
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:352981869a94c18d4f0eabe56aeb635802b08f26c6a33988bdc279671cfe285e
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..abf502b52fddf9cab8b484c951b9378edaa9de6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e846870415cc6aab6db30fe31b808df35c35f8e7876857451ba88ba2d374ac4
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bee277c0df0e72fc06f9bee449425b066fddbc8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f321aae110fc89832131a7294b8ac8fb8bb0e2e7e07707517af4819796fa81f
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..111b586658a6ae148b8ba48b37a17d5d7cf58c81
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8e34dd0bba2420fef59dfb1a3af39fa037c80b93ae6ef3711181abf492c80fc
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..676ae8876949952d781ae3e5bf51f524a93d7136
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdb5a5730ee147b019ba81182178844eb8f5d633ff098e92ea59560d74869838
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bde0b4769d73ff5c0c205fe42c392e635d9c646f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10549f0b5c45a9c9f0cdde2d0312587dcce2fded84ba210f678c4d4bf7aba68d
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59e0ba0ff51c6bd9b809e2a5f771972872d7ffa8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64c639ce23a5fdee75ed3425effa26c6afebe81365a9afe773d38d32ae46ed6c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32aaed4210d8d61f39642d0766c1d366ebdbe664
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8cf9589a97c82f1003aa849641d3ed79c41f1d8381da491fc11177da6062ca5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0261f1f997e08d45ca5b97b97f01e4d5d2a51be4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41332323b5ff2761fb4f2849376e0fd0b6c3ea6abddf845625431d2087d1eabd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.21.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5ea6bdb650c0432a18d9dc98385984de65ce566
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:962e83cd9b38f16d54afcec6499e90c9e10033fa2a4171ed4cc8685cc4518073
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.22.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..21977213480c3f64f91f0910a4536b31ccc1201e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:968b245b646557e9fb3dbf9f762a2267e57b8d1304df4f3b2c49cdfd26aefdac
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.22.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.22.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..384952fc1bc322fb9084be4f682df66c7a80a5bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:848e2a9c9c2d5a7cecf31d1b8702c14c45c33020792da4aa520673617fcdd941
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.22.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.22.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9265fcd7ed3f0595d135dfbd88b8ba80eb2fba9e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:259f0565ca099c75d92bad53ca12f3603023510b0d92dee55c9af72fb4c5bcfa
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..98c3e2b21f9f88e3f834bf5b0d5d62d1e7835b90
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee07c680c3d00f55d409a686822d2ee1c267da8acd6eb4c8ed5b26de6b5c182c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d28e5e54ded63dc7780b510d62814f235130d7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99ad7ff5415aab3185af877a245ce3d8f50a02f4870c982c3bb1d92e50f70cdc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f7e5fde0a08524c0b7c20e82a11d1d8db1fde98a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9fa04d7e85daf84b73c29e188b1a51a73765e92d66036abfbb1f8a2b21e808e
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..baab2cd0b1d88f8dc1c343fb2cb6178e771fe90b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9be27ce616f0a50c04dd13ca8b5893e349845cff3492303164a99fa24fe96538
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97f3673d2300a7fd55f5349cf3405dca5693d8d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c43031a4758e2cd9df366df4926f46bc5f69994ec55f1db31de51c88f2ac2c2a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a5f9d8406eb58fa73cef817667af101ea8f8e693
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:833d4a0520989ceba6e2544f16313899fc1ab2889c12e51c150d5bab554e25cb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf15dd6d64b2c3384e19dffa37b8d90d52d67387
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83482a44562681ceb2e5730875ae8e16cae0317c1fa55c96892085baf4e910bf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16ddfe0065145a26310fff5dbd214b7a1fb4ec96
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:381a6a388d2b83f57db1cee12aa31afdbb6f158c4459a14a42ec0d6b1467933b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..999770f72de7018d30e9e411664b780be5b9e550
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:210bd1ae96a3dc98930730d3d76e83aaf76c07a85eb8e380f932647b338b7f63
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cc70efc314ec2b0fbb5459839d0f69c8455826b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ea3d5d6b8eb4d78f8df2c5e3ba74b2e296c549dbfbdf0d432e4753d49635b38
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a40c7f61e5f5c4bb85ab7f3417f6aaf38e7fd83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:070102c6a74756441b1b982880697ac3cd51057f31e016036f333abc3cf34a57
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d0afb097baa96970dbaaf835bef3d7b0eea7e4e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f78f1799fcb59b60d0364b33f2a013a23fafcb4159524a935719a54b43ffbacc
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..21fafbc430fe4f6f2c4986fb45c1942affb3a70a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d8cbf1ce9c7b2ea561a27152ee2a380257b1c62c3812ee59678cfbb001552f7
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37cc5185dabebd0f63afb2eac26076d73d939ede
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84565344a60674b5a8cec93f4daf3756126bd85d99fe4e983aae54592d78e8eb
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bc6e6d208e9d037aad8207b30f26bbb6b5322b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:653669f630e048c32146f268e986fede45d36b38fd970f6e4751746c745f49c7
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a214c5e090d4160567ddd2b6af451d2f16433f77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23462a4f7d46d2052b3afd8769b571ce696f4c1f36c7688d5e076d47fae7e116
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b994e750483b6252d30e807a7b2804d493dc8210
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37f5c7458f288889d1106ec5cc365a72c813bb469d7bb28a4106b346c95e0b74
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b3fa1cd1a108273e8475bc3073d4c0d278c74a9d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6138b421c0aa2fa2cd375f684a057cbab49d95435561f2c380168004c4a748ba
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f1e7ae30d8c4701cccc207c51aac1c210b8123a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f023a7a3a03272ef752055df71b2a6dff08c7942ef45e9f104eec289a063ac7
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b7263eaeefe8f9c26144ef64421e866321c6638
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d11cf67b8f9a51ad4990a1661caf66dfa2321ad9b928be207b10a485419ee51
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c08209846f6830da0e30bd1455303405794a26eb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9f61201a2941e8e86d3d8c049ff29b33a40b52bf43b4f7253c18681ce279af5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b2a99516780e56e105fa670c8447dc70a0f65a1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf9d105b6f69248e9339735aafc6e866bfac7bb19e298e4a74fb4f224b369a05
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..adc6f7c81ae464fc0aacc69d8910362d916e5ca7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01fc6ce4a96c68adcc0bfe9e13640fc7f7b025611a0cb5b7dbd78cc9185788e0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a4ed4a7ea99c65005541fa4c166462b0966d41e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28356962ab09a4fb21373143f996fd93df4bcb1b22b800cdb389ad3c44777468
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aadf6f9bbc16e883532ec40e386e942825dadd42
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c28e402dbab4a69a05d33b8e85be0648740f36689f872426612e645eb6d07d2c
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d25a08d7b4bab79e1e4ba195f2f5ed9a17cc8d1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40010a3f79484116222c99912f86600274be685791229de1267ce5d311d5c85f
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..452a876ca83d47805af18ef20ae9c7abd23783aa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8304f0278f386b7b2d2102199f9a1131cdad5a1265c50a24b8785b13e62f1aa8
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab0d9c1d075c0d5d2eda044a78082bf364e7a918
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7c7446388f03d35b5d563e2a38510cb7b27484da7cfa36b8b07ef32bb1c3082
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54353ebba930a333cbb3907be5c899a08cc3eb96
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0a5acea71e4a03283a50935e6addf8780353eb13b7d2c215bd83415d8343cc2
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2b99f471bd81e46b5fd3548644b229247071107
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:338092e5024c87e190cf36dbe6bc5a966cab9c17e380a24ba525d8f9c389ae83
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fff2906c2901e28e20985a729ede38c770c7779c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f2db98d84bede838be60ada98ae0301cc4116e40aa9bd21783a4eb1b6279a99
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97b0bc3d260dfb5a23b87aac43ac3f820c6d519b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dea2ccaebf80ebf09af448d4f94cb8510264ab491434fdd84a535bc59e64edd8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..826c79453748c7d7b445f2be5aeb4d8e79cb4d94
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de3f040979162938663213a39347137cda72281331f011781c7fa106c28ac4d9
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a010921adab56b8ad15ede514d017e6c0b6b6c1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07efb56e79477ca7b56f6ece79fc84a8fbdd232da0349aaa6db35464b541b0b7
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da1d93f282b4f1c5f7bae7fde09b1179c106e697
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6903159b7ca10d5fdab1010569eddbbf9d7ebdcce5268d6a63d933de1905acf9
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2595dde189527c764bbbd92f88680d49fbdbcd91
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bdcdbbf008966ee1f7e85a58f3e1d698d8aed942fecc50153bdd42e19e15c2f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f19095e64741eea7e934fe4941e58dfda5b5f3d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31fc643f9eb1c2444d56886f51b40b61a0e2b322ec4fd98759ef2b13db1fe80b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb19eb46c4765262f49cfa236d1bdb50b8cfac5c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d6ccef36cd16cdde5604c1f79e984880d1e0081f60c0b8fea2a77e6bae068de
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dd280525653a9a6e11058eac79e782cee706c2b6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f9d6c78d2a87344f89c7913902ed84c748c2b197dcf1659a310f6a8ed469c74
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4246bbba98903fbb669737c76046e8d5175bfda4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:473969e01f0de454c08672151b06289a519c0ca0260bf2ffb3be0ddc3f30e413
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ccc68c5e99131fd06fc2e6cf58b15f593d245d2b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9533fcd90adf8884667ed1e65277d4425d576d92d48ebd5de34b982f6a37ef78
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a27cc8919c774328e65cc30c5b21f9fbce2df91d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c710c4bbfaf65431d80a037b231e23eb7a53f9da13d12decb1a182479fa46379
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..84a7fcc9f92fd49872577ce9565e48a4e1b5c3c9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:326b72dd664305450c421eb83414050ea689f0dfcd6b3f773deb9e313ca20a27
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b09636cfeab877348b7d4c72af4b8e974c4f1c16
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3ab90eae8c0fdcab9d690768a76cd7fdaf294c96df8faea508cc78263b4212e
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..75cb06f810469b576ab80232ab073d545bf2641f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d37ce2c8af4882b536ad3ed986cc06be4e54d468ebe4205ee453285748b2953
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..35134ce910b03c3fe91efe63c6d62a2212996c11
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd06a60746c481c73650e7b22a46b3237bcd0aa80dc00967ecf3acb7e43cfa67
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac3b7d72e789f6fd023ff162485b96753184821b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9936138e446873c085c001e583e79a2a7644ec16f730128b349f81a3a3c69687
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..799d5907c063c1ea215b91370bc6fffd676d9cd9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:449af6d083146cc67574677cf676b7db3d7277e524f8c5f28e6173d78220d414
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1db8aa208c90c6b75cc29e76df21216b56059c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fedcf49318e8577c0dda27f9e41950784247f559264a34931293979e995d3824
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9fe5db006916880dd6f16ca84fe545f92965208
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a382dd2b331f4be92fc45fedfaba1732171dd2596ddcf1ced3bcf9565c3b0b48
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ef3bd75ad8d0902c4f29017db85285fe049e786c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cba14e23f5e26e491207d124f996b9ebcbe75bc051f6fe7f05bbbf8e90ebf59a
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0dc829137ea4a632b5bd11e0f62b3984f460b53b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0b51a9502dadce457402f1593ba96b8c6be0cbfa3b714b567e9dc29eba2ec29
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa085d28d0277c12d041121d5f86e17afbdfb5fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0b0be4d2c46173536b3a4b329d19d24e39136f6f5589a7893954257bab1ca1b
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..91237ec35fe51bd0558d35c580c441bdba353657
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4ad44da9e8b9d363908d7104f7747a1906a6d23e8ae47c399fed1cce435789a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1127ea87b65246cf1d989d85c450ee014f2e3514
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2432d56b7ce205d4baea5a92e2770b009ceeb2c903a1fd6221897f4627c99ba9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5674e37e2100ab24cf35021bbbe78c98d47c53ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f6d64d1b67ca8682c0cc0c8f9a53cd080473b2c4154348b2b62e3781b2155da
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.22.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..509ff6730ca229fcd4216fc3e4475ce6230ac9fc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de9cff23cbeb7e52fb6dcc38fbbfca79fd99d8b21a5bc19137ee6a6f0c698cea
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.23.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6da6352153a9a0e79e72615ca3419f14bc2f0c9d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:349d9ef696d95e7518cf90da3948b78060ecf5e6d07ed869a2000e0bdcd2b827
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.23.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.23.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..147e2f620ba143fbbbf8352e95de27a9a4d1f9e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b8526bac7aaf58be1543fe2ec71a90a198401b6e17ec2ffaedbbff06a335f3b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.23.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.23.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0918b4681c3ce222d491be9054e32b0b6b77f638
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2639f3df665e35d220ee13cc752334227f5297ce15713f5afbfffc2ca1e1240f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9db0944b90561b0c303982feba448ff61ceef3b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b150407b9d4e67789646db56e6c4eef30d0594feca503ffb60e48e93ba4cdf9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b6690279e3108ebecae31c1a230e8152cf5bfbf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caf277e4b2c63b24670b76677002ca344d169789c0a4006479665e48cd9244d1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4242d2272c3c04bd9669a0999df57a00ef4392cc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9271239298ad4a01382d7246b947578bda070eb636b1d406ba608b184682510
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9fb8e4f30ad5180ca7e6cd7c7fa3840a64cd46d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dcb8cee57dfee7c892c9c046386f82962a6dd65b48f4c1d9e38ab2306eb1c97b
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..89b34b198a484aa6d8d3e1a173342f288da03af9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae7249d8f90963edc3ee7804a2e89f38a94d2c2ff6d2a38ac61f03d12534deec
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ebbb5afdb6893be854f90fbbb7b4ebb73848c28
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d71e61f7b3f1d10a246902edccd61eef0c6570e7c5880e6b6f7556b0092960f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..594001523b11b5221a16517b76ebf28aff510717
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4724f23001f56bd05137d42f8300698ead34fe2fb2028c00e950744bb3a83228
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..444acffdad7427e9fb670ab9ecc3bf46b7595380
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb4261a0a40642366b67a5497b37a2b1342cbff06f41463bd7885fbfb5723cec
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc165da1608a5589f44d0c909fad23ce36f6bf9d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2ad52935b3e246e627cb1223a090fd875b09a968e6d2c5d90390860c686fccf
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2afab88ab82b626e01a5582f4d19182f8efe28e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffc0696ce6f3511d91daf595d9e0e04662e06b8424c8f0dc669e18522d2c9309
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8c881236d55d28aaa3cb4712111b4835e1b9349c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1dc07858ff650c60a9c98fe2d6ce6e3d7324af173c44195aec4eca2c98003caf
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4c741280732314c10461dadaade7cbd2eca762b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1abe1928a0ac5fd2f419ded7f47049fdee084883895038f8bb6a06ffaca31b3a
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a266af33ce93623838cfb89b4695de93d792da3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01c0c03e271219d574b7451768848696ae39a292fa90b6291d5b0e10d67c3b8a
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..31eea4efcc4bb26600925c92ed8a114a9b13d038
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:352931dfe8f7dcba88d3926ac73947b79333ff02087df3d5ac899d165347368c
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..841aeef47d31009df22d02b68d0a17c0756afd50
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6de0cea19983a61d3c5d73ea539d41c864126396dd818a6ecd5cd121f3ddbffc
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e81482a13e680e2eff8d352c2a05d8f727a332f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f17db4026e4dd4528fcbaeab7dfa1fd4b733c7230b5fc679a8dc2a795a5af898
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e38d65b29eb476fd27e4d29e31611e7a667f642
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db89073a10b9a1e702d63d5bc096c6190498205b2b4b4c5da1394fbd0f88c068
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7fe1de867720efddd2f43e699828aa312c339b38
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0512692a926259e87a985a21ee2dd2f081aa026ccdedda6f2802bd0d83edf52
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a7ebb239a4ea42aa371648c2fb1fb2b00bc1e15c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bfbdeb76c824c269e9f45afacf60b640bc0974d8df495890c6b414e2dce8a46
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8366254a91b51a84978f6c0b7a0e04013c44edd2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:403646ac881079c32ee2c3b800682c917ae14e02400ef294f947454190d545da
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99bbfefe500e775e5c82831c3b6709f5084b6465
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c8dda8ffbb53e9096f8bbb7d385f623418300ff6d5b5ee4bc35fb9da7a6766d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9f9c710fba45518d593c05e70a2112f7fb5cf28b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e9518b988d0d3cc9a807fd3369c6b7dfb639cdca22f54e6bb710d4351e0e0cd
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cff7a0bbeeebf895142db034d799b107c29b265b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c8305f854a9431f71dfc693540dfcc6907ec5aa5576de5b99ed4ac14548012d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f52039df54ca94ccd8cf4b3efc6638b1ff2ac73
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4930f4a3538bea17977f484a6ab6fe5a38047fc3a277cea7ebd72bfdf9ce042b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b0d51e0d0f94f6333d974a8e751d4404b7a7996
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4bb0001c93ca88c90f69911c606ba97ecb4ce5c6d5ff2d3fb4bee95f95902e6
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4841e6625b8905c53b676888b81890a5753e5cfd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe37d5b53f3671478938dd331cc0cf1b412e1f6d498815c09b03e216351b30ed
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..599164e68812c76d73f4b7b9a2dc82a3e4b46783
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8c4b527ae08caaaa73e18febd1918a100ab8f88688be215433004ee4ec5f35f
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0cf707449ea48df02a54f800a3fac6d7da5cc03d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d4e3edefb53ee5a764e9316b28c7cb238b62a041d6b08aaa485b0d667d60887
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d972ebb901bb743e93650b8b4513b1bcf52941a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70996d034db0bf9a2e285d77aa08a5fbce2326d4127f9fc4f49c0063f12cafc9
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c352af2efec576831c79494119998f42dc20860b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76dbcf73a89d7a59a92d933cb60e02d51312a95f08da4b7e6bd14b2bb485a7af
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37559ffc84088e3e64c1c113016937f37b47c4c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9f494b9eabbd1444a2bc7a51ef79c38b95eb121f42c386cdfaec083c69c5c71
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8463ea395ca3b9a49c77b9b4ff340ca6fca3764f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e617562e94ff366503d98feb3e6fbedb0f73f0dd05838c0978628673a5d7ec8a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cc35b17f5a4daf62920f3abd83f01cc7bf072fa3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30e2845144c56dbd154b1359d75d69646162c66beb8c2646853bb2284b23f2f8
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2037015594c470d4f7515634a673274b5fb5905c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c3c91cc018ad5e3c2724f4fc2a2e14bc4668fb6eabf89d3e0cf0b28990d05ef
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ee5a7fbebcfd8d3efa6a3468f8cd87e4a91d803
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74b56489f6a494967d479a413fd5a8f8bd43c16e741f683df1d05543d79f54ea
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf3e15d4c5b7c160115fa1d7b96d16aa8e051148
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0f19ad96e4c82f33c8bf51335a3dd89be76acdafbb3a5d1589bd96a17870996
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9ab8e2a863e74f460476d441585f83751201dd97
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a73617dc2435cc845b12f70c2c02affdab83548f07b8f5dacd4225bac488167
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1e0d7112f5d5495324337e1cfc382e938e8698d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b2cd97169354632023e8854c0efde3084c44a7e60eff5e2a04293187da02179
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3537988101dca170da8523c147f930e1eb1b827d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:815971a0809e20d97ef82e8c145ce977c65dfd2f74d955616a29ff89a9f94ab7
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54af7de31f8abdfe5860df9155e449b81c3667f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d51bca14da961aaeb508441a289d3020d10db6eeb650c27b84737b0af7699b7c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d9707bfc8205899d22b29135a7681c573ff62ba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1a4a9a6667bd66799b3b00af727c82c8e204eab5fdc800d92f2bbdb75731271
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ba1784278f61f5da827465ba9ae79937b6c17a0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52c7be83f22f7bb2aea256a388c2a91e1c9f7b0b41e7fe7214e0b6601e070de1
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d4864e1e8c6d49280820517e91400b484ae46b9f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92a508fed18d93acf8149991405b89aaf3168a7230861dd95984f4e2beeb5836
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0a3930bf74fcdd816a5d6c232b12ccddd5c24a4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:383043fa6a998f4021c0e7235114de4877b89d0db1d50553a462b5278182dabe
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6fb931c6a6eecf85f362153f42641076f4529b52
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46a00db358583b2347677ba8699acc7c1e8a26779e6a19e9bb80d9fb2518c5d0
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2fcaadc33d7e4d5b6c5b29a6bd58e870ceb9f1d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e26234b6526bdaad4a701b9e90590f4ba65cb1ca1b14d039d068d5aed10a423a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ca05f85c98b982553901b59281fbb48ace894f73
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c57d5b5d646d1e178a11399582a102d8ad8585b4be21846d6ebfc4d262fc0a29
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..baaf86727dc60fada69962409de9db0a841d903d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de9766ce1e924fc4cca9e9368cf8ab9f879f059539279226d7e456d5845eadbf
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..258da46a81a9eecbddc1d1a5233081bee3baf17d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d22ab8bd990ac631128b15de10ae3b57a42688f474aef1ad25f42e91a6bb9875
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d8e7cc18dd4ae4b9b9f7f2e2d111dfd3e63d3bf4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37c11c3f90ca941b21a08187af4f49cf27139cbf5bb470fa9fc4ba899c77e653
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..23a50e1f373276cf59d30b189cec6fa3e934b419
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9b97c8b3315d0d87fa375f124132a7e2593b5983bd8e90dd2df1f6756d6159d
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd9ed182040fc4e68318defeba931ab522c58387
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af013495a16e522b815be4347a02b3121805712a4f059fab4ca18266497a1cbb
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d0ca285fb1c2dea2e1e36df7374315214c3e11c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3940adeb28c2164c04c83f1c5468cf310c0a0f9dde380211886823e886d870a4
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b190cec03dd49782230d0975deafb3bf35c5047c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06e014ecedaae5d712b8b5ed29da44d8b379c6a421630aba1a8a967ba48ba5d7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9cd7c2f5290d703d6462f3c556e2346d196dd8b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f578f75544179f95dbc7ff8721d975434f9d3ec49773c6b849d2216b57475ea8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58e954933a45f1439ae92c241baa57d4b0256a4d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c799a7f2ee5eafebacae8bf2ee0d9a78fbb9c6e738605c30764a516a5b7d6c03
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.23.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe745f8eebc29a05d80657049bfe06dbcb608cd9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd6459119f4a76758506ec50551db1ab26aeefc2194433c1da9321cdc0d17bd9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.24.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ace5f9d36c345793cf27a2280bb7ab03b5cade09
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2b5f474bb896d8663a44bdfe871116f3bc57cfaa47f0236194511042342a3ca
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.24.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.24.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..08eb06ed60b31318a9a5de5e3a61935ac0a3d484
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4532ed48a6a2583cba6e6d727ac7adf1ccbffd3df91c7356fb02f7484b42e260
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.24.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.24.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be8fd5d925429de2b78dcce1e06b6e84bc9ca1ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb01aff145acfe3d0009cca785da4eff035cfd42034fb188c4b4407ae6ab6b82
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8efc896fce9500c7381d073881da02cbd510132
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a21ef93dedd4f168558b93dc037f276cf37f50ad6a3cd5e86145d17ea55f4614
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..febde4613123dcd46e4bf7acbe3c5c581ced3c06
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29f6ca2d59fe9539d45c0c6262f91f31c48d907a872a2271dcec6dec2c56cbde
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6971882cdf6eb19d872d142a9816780c887fdc9a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73abd9ef96cd31f4bc1842695a827d9a649e48c6b5202d5df77a8aa72c8e02b7
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60046c51a15ea0741b9c8cdb625ce5f30e58abb2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7de6b0f63a5e0a7f837306553ca8a7c3e24379a10c19e1a9d7a9d790ee8c5b6
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa2c72091937f5ee1cf6ef2e393d7a25f3bfd253
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8f29ffe9c82898b7bcda3188afb0b19b1811385ac39dedf884ed34576ec661b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..55a6abc77c22ebdbe75a3e0d9e8b70f0c201c9b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e49b0eef8255314deca364b2d7caddb52f8f1915198529fd76f501aa80c5320
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa93726253666d179dae7977248f606285a16958
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d82f3ec86095eb2dba5894ce9adb5185beb9a51d1499f4294eba1529fa04e01a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b05c6f3764da76dbc844f528fb06b18aed605bc8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8e2ccf630f5bc395dfe48915a8679e57add58599505e192b886610b503c3ca2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0f5a9bba2dadd8a4b2d212c36d7566759cf12b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3e84596482349dbc4935a82cb597dbed991b8c060611dd1fffdbf548c65ed9e
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8dd6c6bc8f46fcd837c572c277d2e50952507f7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a23a5abf0882b5f4fa714c710e044ef5a698b2d738356eadffffa0eed25be3e9
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1c45ed77be11a9984b7510c9b5ec1d8d296b61b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58fc1e8c4cd728c04112da4dcea6037cef5046e5db2555f44c559ac69976cb7f
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..88968e78d400afac3c875d45771e2cf07d262f8a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fb40ec214dbdd03747b017ca6cbb766eaeeaed6a0dd8b79283e34808fc103f5
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9ae3da042d7270f4373769d3448ae7936f7a8ecc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6769365dd0c56e0a8b600481b09684d6f15df112190e058ece5fa10caf8990e0
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..928a953dec1bb5e74ff22b8b760791db40fb75e9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61fd4843b190abd98b3d25095a4232b0b6550c115be3aa25a910e9fa61289d2d
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a6f7fd0b251b0dbef9505ecdbc25c2642a6e73ba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8cdb2f672c1997acefaa06428081a0912cfefbb269069b31040813b3929fadc
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4daaad9beb239d027732fbea29ef01b3fbaa5632
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3c61d8292d67aa3f6689ae27c02dec219fab688d85d40fa59648818a287f513
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..574b170d978f3d0185aa095c2d35ec8180e53be7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cf1eba3454283b9e80707405cb544ea9981c645c10ebbd8409c57e5ef8be526
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8bd75a14a41aa09fc23041b9dc00cb3989fff496
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8af2fcd297fa49152533dd76fb37c7cc8a9ea5e5d45cc80c227d2a4fbbd4a2a
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..916b2bc01fd5b7b740bdfad6451cd8b124993c0d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d3db54479e044f4967559b4a57f0bdfe6418984c8ba830f8842ef5486b3599a
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..36e7716341fbd789ce26761a1864f832447a47b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6199fb1d0d303f49f3c52e262ca8334b99187a9e0494c0958b31ca6f67191808
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..13e7fe022c740ea0a0411c30c934169ecd98b27d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b96c1806776bf439e420ee13ae9fbd8b0e3c275c9b15004bcc15df493bdad04
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e907eb2db795fec09a67fd107cda14fa4206d819
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b03e8241355483e975c03db31a20d6f87178a9ef85f19208b157867c02557af8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b633839f5cc0ec30530eb0b3e01a1d26d6572
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0797f5d3b1858b1b9a82ceced39cfd32844974426776ee8e4be290909a3b816b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..84f612c04211b728a8b23d49de1779e3258e5ef9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2099cfe0cb8163dd38963072524b9062525ab9eb02ae1de283b88973cab4a8c8
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..82732a14b4d06558d282cf6a5eb438a15a4cf907
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96abbae517dca4270844033348bbc71d7f0f601c65af361cd5d4e8b46e760351
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..86eb3ff82b8fb0f37b978a434688a2467ef89217
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efbcefd8245739db10a50f2d4dbb1fc83ce7c0e87ac78f1c0dc17943f670d8f2
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..209dd6271b2bdc4acabf131e794f6d5c0f28ea9f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcbcf20da408cda32856ce5a53d3638b52da86e6f399e629d3d00ecb39313210
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d053363b9d244ae81d7f8f2719cefacb15c1d51
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7850e06adf1742728320ba24ee81e1f14227bbca725989c06070c9184a9ae372
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d16109d441eb7d16de3c19a669eadb2f83ae5816
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f53d714e3eacac5a87bd338033b74352637613bd838fc7984e601cfff0cfcd26
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df98f43e54ce8c09bcefd2ecbde21d45e5cbbfa9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2029258e0d9489bc415f44fa4dc19bcfe67bdfebe1f479f8979055ca32508c19
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0f6f9589c19b50e7bb1d816126e276c7bed725e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61f744f55780f8a2aa75bc9dc739d97dc8da239e9aa1ebd503b5da83642cf27c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b4e130889cdff81e3070b12ee3c760b53fcabe3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58927511c9d30d29b2f23e6001142dc81469a61550acbd7ba75ff9b58761cef9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b7bbf55d5834bf1c90ef3962fb8e29fd3c547ec1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed9eca0fbf651737e09c6adacc60645ba9b7d22b675e57b3a66800e2f9e45063
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..03b176f1db56d35d19695faf9c294fec5b914667
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:137696c2d149d55fe75361c32e51ec7e070b6f67ff1cb7c319d417a824d54187
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f35c1f4aa59c2e9bfb4212b91981985e63a22bd8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee7c6682767e2558767756ce769e4b714fe453e37cab4f8cdbe4aadbf7c4492a
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..068bbb055b31f389a04c76bb68c1e4ab98954cee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:932c126ddd1afd6de79621c4843c21d96ae6904faa55c1049ae43b9f809e790d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f5513fd6cf875e9e09cb5528ec7eaa70333c711
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e8786309cee44fbe4a9488480eb6ca1dd8c340899b5901c5b379ec840ce0a7b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2fde419687acb74155852a82cfb8b1495845cd4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5dd86d49f790ab1a39b4a8d568fbfbd29a62c22910e5da5bd8ebb3ab1ae8f48f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e924ee628c26ae0c7c69305c58c6bb8848a91b55
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8b230a9d109039070bb478ce2f7a499cec871c38824638b3db1248be879fda3
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..709c3b63329f2cbd9c80bf54acae1f3f4486ef8b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:945433326ef6b709a6ee4a3aeb62b214189ab7cf3df0364e6276be869dece570
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7696978383505e797528f54e9395f3ad9c6789a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b41f314a11ff5414cccecb075bce4a561f9cd20d3a4e090e02198d352d8cf4e9
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c5bff791cfb229ecb02072c41d7a7ff349e92975
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a474cb51358ace68bf67cc70aac73df79a6948028a1daf95737b9dc48fb16179
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e21b13c67b1ab9384ac0c69386b9ecd166d9024a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bac64cc7bd228d8e25d84b3e5050f830607792478ffa5eff938c3b30b8aa171
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69003ce6e82d1edcee7d973168adab4ed57a879b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:351ef455632b2569fdddfd590f368eafb75499d9e28e8b32f50291b4b0995a55
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..338c062417da196dd146e421c596e7dd63a5a939
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc150e21d34f4a06f68270c94fb50e991f587d882c0239714aad5b3724635a9e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d260994e2f0190e7a7710c9801c9f4652b854ae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b31d12d08c72ed972087d4c7beda445300cec82ef514aedf1caa86cbff08ba9c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02cea98189b2d0eac3168f60fe6a102a1f697ae5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec4c57fb468f7db05555cfc7dc7fed760c1f5ff899c1df7896e4032042768a18
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f064eb3b1efb331cbe66e91911859eb0a8ef2b60
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa22cf96050a489dcdd34ba24f98c1307344725da51999bcdf269c0001f9fe7b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..88c0849254454ab14f8e57dec994b8576351f104
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daa52628d74df3cd7c47a018fcec5895ff1149d2c8327c37902f87f1b5afcf3b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f33fb509a85e2dbaf24400ce1cc5fc1cbe83ebb5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:949187f611af0650cc2de4eef99889637be1969c3cf3c191f4c9796d070629f3
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3975245613877e8c4505547d9b82c5b2d984f124
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:964c002ef2fda2969fd9de245ef1cadb2a00c39a97472539f1416fa772fe7014
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..edda3c3f29b14275d0859757d4ac26ac91b71fee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a3580910e57e5db5576131e1f692854eae5b33b3c9acee5e2f738e0e8f2947d
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..649b1da3c39fc6c29d06bc6e34c7b09aa2e33ccb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96de721efff17a1f62f5c3fe89b0be91d37dca2e6875e82f701ececf324c2004
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..14be88b69ab1cfe573b8891a5ac688d8ba628f6c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f415bf0fc081ad8059cc411794862b9a79fe29880baad824f5992de615fe7697
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..223fbd1b92683101ed4b8b7367e76a3d2544a36a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e52fbd9b52bf805f95b9c852fb2213cf2f4c7154e0e2abfd4aa88c0e044cf428
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c06cbf1c9131dc036727272b6b2e04602bd6a16
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9465d8b5375502a1b64471de5b74d91cb24b37333ce2899db214f605a3d41855
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.24.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b37da81557d2bc70175b647e0bc6a8d110d75ae6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:465fdb19552ce75ec40f085f526d5f8ba2acd49d4d72228b132e8a8ee2a4fc3d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.25.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc98bab1870073161d17c8e26a8261523d72a8f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6587cb7b1cca058f44bb36d13fb1fc0f650eaa41b7cc9edb0e260ea18a43808a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.25.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.25.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b1809b6eb1144f7316dc9976c800f8ebb61522c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f759e2285a917a4e1bbde1b1fddc87bc48c18e30cdc67b4f03c6b7ae88bb36f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.25.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.25.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7056bf634e71f4ad7cf83ce090be9a98333b871b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4ca39adc3668a3b484b0e9c89a41054d1cd9f2a1e505a283460285236887527
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22ecb2933e69b0df44bdba4ed60f50a884a520b5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9e5914b635724672c03e5b111180249cb3d8e7c1f80860f49ac047ef822e393
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6d06406a5d38643436b07f2ab545ea1f804539c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7369e325362de35426fbec6e8138a5b816191d18a80fed68abbae006e3aecc42
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2424f03919d96a705c266f82b09ee95eeecc6834
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a9516636729f285630935911ebfdb372bfe34284c47156c3aee80e12af36a76
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e6a76548a3f5a2daebc6462b79cbe6cdd6bffb9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf54ded1e7f01d88aa7f7f72353813d38c98c6f3c77fa33e7bc6dfc316ed6087
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b069255773924ddcf5dd2939c10b22b9fd7271ba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5ea9d60dc37538a8520b9a70c01e3c55d30f298f8a662e06c78f42c01525b3b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6ba4a6b9dc1f6a98e4354a48ae2e9b5aa161aae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6415b5be7fc93243822d1653ae1faac5922234a0fc8968fa0479401fbe030591
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..28e854d766be505e2aa2ee5a816534ad08967e0d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fde9a6f33d6253e3e0894878abfdbcd28c850e3b5114eb63c296915258c19b8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c69815aaf3359c654996f744d51d042e95b6992
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6f20ce26ec08f58f22bec81866d1fa44d5809ee859ce52b3232cdf8cf8da1fa
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fdda0fc53580a58133dd77d92b453f1639eedfca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:303dc8be799d483afc6b07bd8bfa27a70a87d46060c465356bacea2882338019
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ecf869170ed30fc34c45745f19c178d9e21ac8f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:479efc3116e7c2f968dad9fa51747698cc214f7d9ad36b81cd4aec3a2fb7f280
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2970575a1a8086f112a07df6e53082fc90f738df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08b9a52adad961501f341d4ea93e1c642ef656dfd65d32a38e81ea122cac3894
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02d89fcd4378485a2e33998d2275ddd2f585fc02
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef04d894a5919961840f497860e4a9aeda140dbce1d2a364a64ac4e0b5f79c13
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..411d4fe0ee92e1b7c44276e1ce76ff7b1403d9f5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43e0b66e3e0d36e1ec70c936a7352dd1d236107a80819da29561724b793689b2
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..12fc624387077384d218e88399bc2a191016d51c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d363a492583099bcf00178e33c1e12e6092a5d98637c1a9f1d9cbe371aa1915
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..483fea5ba652d391de831ab286a37a762b82e3c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd2cda73d29fac96a85f0019af501979e1f90cd2e879f5329fbf6322f700a40c
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af27fbba0524f9602b43be7f3d5f32def047aad3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9decbafc6854345040d9b1e6f78d971f8dd19605c4d3add02bf11578702dfd57
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e26e770f853580ebe4aa710ff3d631efd026d226
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:164d70e4f90b87e63876058ceaeaa5cda8f51dd7f437cdb2305d09e1c21e0b30
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9dfe6f05417d532c9ca602202692a5c0e407471
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ce4b5f51c5b6c94117b2c7f61c5a6bf45eabc3d2be64c94212209b19a7751ce
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..105bd536cf5fdd5550c3922ec8d0f0705b08dbc8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fae5258cc9152c7b3fea61c3f2adaf181d2aa9b8b1db1c4e6ec81bd5d893b26c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..136b6bc49cce36cccd7140e46179013be03cc71e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89456df457f7c4402bbd3ff248828f010f50a136949d1df3d1c520f7ef20d9cd
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4feb50f6b8e735b315b5eaf5121631b307adca03
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edf76c6eba8cc68e3bf5321090ffa009dc30835aa79152608bf31bb6e395159a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e52a5beb1aec19bd9eebb40f4eab02f2c0d93fd8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dab37d720c90fde00128705c25f3eba10a479e4872fe7753aea01cb99773da5b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b97bc05359c42a5047f5868dea2ee07173f0d13
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee54868cb9cd85769c313b6ca3233f245779ec2a91d04a89b0d9bee6d9f01a3c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fd2e33672ac034860748d7658804232cb96c5718
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99650a77162933cbee3df085547da0735bbe86a647d791591e0262bd4877399e
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..980be60086f24e0aeb3649f6308612397edc2e69
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b7b369b2b8feec0e07e79a5eb33e7f698920b0522c46cb3a23b1b12819742a6
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be2876afd40c4b50852ae034b940350c1dfb6d4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d2a51e85ab9137d1efe6e9bc383ab8affda7a7d854002a45e0844487aa87ed2
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0033394182057e2c081e4150901eb397d911aa63
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d59a409a748a0264f70dc1f703a8e78a8ed6daca57554d9adf6d391ca7077b4
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..51f804514a0ffd0dfa4d701d9b9036f0a4facc96
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8313880c87a65efd8501d3b3c3add1900f384d9f95b1897c9156db967474ba3
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..95f1d941eab07c34093dbf642d5363bbbc5d16da
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3459aa6c4f036938d13e1c39bfe8754719056ad66d474f7174b98accae7ea8a1
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3519217dd3dc4ce43745989724b0fbc1f5286d59
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4c94d6d6830128510f8e03cf6ccf3f09dd2f934fbbc3d4ed30fac764990e5d2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..26b916665b3b812bda2f6ad170f56c1a7a4ac463
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc198ecdb3b474436c2815253f5390bede36987a21acba881c0801ab7db20882
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16765382a00e29502d1c54aaebd6d0cf3a6f1cea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89731f4bc7cdd7d2e4a74f6af108b0fbe05a902b649072bfb5a8ab0787188de8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ae24bc707261a13432f520322a1c24214bab816d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73bb5c19297c6b620dedd02c3dc65b6e325d2524165969445d63e79472fe97d1
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45f9cc29600e50ead1744eec3fdf597f4fa6c312
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:685e90bd1dfc985e6dfdaec515376580d73f0535a38ed4e85c7e9d99a78f4bea
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..26d413ed0ed95583cc390c1811c155d76cf024df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6a2516e9106e929d0f25274a4f867341ff9e926517c10ffe5e1c5f1871ea71d
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9cc58690c846b22b779fdf5ee15f23c2323d19e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75fde2c3a6d0d7d075219b443300cfccf6cc9e61bd261413d6f494fda42f6d58
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4af1725d019272b7e38a671216c1c3d02909ca69
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5fbf4a19a490de83e9e53ac35ab9f1d868ba4763446a9223c08edcb9d532fb8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c4d583b9e4efc65506f3fa9e3971e05e8b3cb294
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b383bc22517403aad35f494014594edbbfdb5abb523855a26cbeb21e3000cb2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..413b218a4110c7fc0b27d617febfad0a45209891
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecba2a80c6997af441b95c9fb38afedeca99cb7917db873094220e9c2583573e
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..deaf895156582f734fe69ee56df8953c968e506b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6e7ddf5f2882c2cec61fcd4b15dcfe28c31aa09c3c9140f32770f780477f4c4
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..feefe8e6d6ceb908e42fe40b173f9c239f018869
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be3b79b7ff5e057d8eb4a5cea3432835b0eac1f41963766476c36954a055c4f0
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ce5546ca407ee642cf1013b0993d12dbd9d8b0a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:602a3da921eed2164cde3e9b167b5bec8ca7d809189759d9d90bfd25ac5ae55d
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b0c845234eb563d025175cbc33b406488ec354b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60f879d256c9001a5dad201a655a577a52733cb982ffc9b5fcef6310dbea6130
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1389c0125ed2bb08e90ba55b63257c760e69a55b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2f56abc461fc3b4781dd9f5b271f456d720db4d0d9aa36ba75fff372458d277
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7227d33a07bbce507e2b2847f6b46a70fab65ddf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:375432140b83e0176d5dcdf85ba7802a79648976bc5e44c766ef0164723fd546
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e95b6bfad3ba24f7b183c09db2a06145cebb76ec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef310022561c6db4946ed6fcabb75af0a22b1fcd129bfead602b403d7ddd738a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f46cdeb1ddf47e9fbd32d9508a372f0d8838c609
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2685acbada95d3c5313d4f5cbb1d35d20146849f4bda082e8f3170ac9cc6c4ee
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7c4bbfed13d1b9ea8199a68ca478de7beabc22f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80a8d26eecb6736f3039e7739f6055c2673663593423cfa08f3c7feec2923e83
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f681c6377b5110f64784008b7edc592580aabf7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1219a1ae96fe5a9fa63a17fdc3cad5c89c0254022de03dfd9b93dde4be9af116
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8c1af1d9f0bf7cd6c497bc4f5628c7b6880be3f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40914c175c2ecf066faa6cb7ff2baa1e07a965a0648de7926e7336bb2b5cddac
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bea8dc40f19c4b85f2ea3576db62898723ec4475
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4b3aab31b78532a3ca9ffac21f53d8fac9305429283463f4c195405edb432b2
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ea24ee23375e5c10197ff12491c5356f96fc59d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:640f52b09c5135a6b69307c62aa72aeb63a480f6b72224ceae6b4c3c3561b9f4
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e40afb964b972569cebe61b1df3110a3cb57a2f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c06ccbb5f48d49beb165e13ab918784bc7820803f6a5c8e188891495a87541dd
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d2d9145a484a72e04f29aace21c1f679ef36973
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:404bd89866779e73e9e2637f0e7ba8263749dc335842284f0e42f3c973246622
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eff7d7f25a4edab0e24fd9d5e70dd826dca58ccc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cec0fc99fdcc56e93e0130667c076cebe3b4c0311567e5126311c370358e1914
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c6c57a868f08bd28481c5b85ebe7ba43af9922b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5686e7d2d61c74ac000cb46167f4705c979228365b409f7f694d0b1b127cc9a6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.25.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f09fb7a281852d5134469bf7b488b2ca652d8b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa86990c5ec6f187fcec2569efb930a9388c8fa58bbad544465c31417169bb24
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.26.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b12b49bd84944c6e4f048c6c46d607d8b9da9e8a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:403c3b2bdcfc000af63d42bc6412e4ce2ff05bb430c77bd1ebd8bf12e9364d80
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.26.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.26.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9aa679c8fcdb7b3ed890d87ef3dc0073bf22d031
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5fcb47078bc0adfa4a8d73ba52b43269e6e3f88d3dd6126c79a0f204b9769f0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.26.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.26.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..640b3214ec9a05fe408feaa82a6f25fa438bf678
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26330ddba15e37dd18a20f3dd555a51bba6762ebfd7bb7b2fc2be9ba714ba3fe
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..edba22a261792b4e84c3a8aeb09542f917114682
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3890f6d30721885478dc1a62c75d18d3e7a9259640f477e2040928c1fba50aa
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..749be9e644f0bc3977c5d2a16ca9936917a4d3ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97d0d949e2910d69983d9a112e82ca97722062ab265006d80afbeb88fd5f68c9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6ca41669edcfa074e63d6aedc872967d8f9ebb8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a083f184c956f1880c3a097a71d2d9108befe12f1a9e7a6fa2083e7a056612b9
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9787774617fd043f4ab902151d64153e4650cc14
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f2a3c40eaa380311693b0ec8086030cfd684216942ed34709d18694c583b582
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ecb7174fef36ddb906c5cd31f0f845335f3d2bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b3fd3919034822e5c4d28551c9a88982ae5bd9e364c030e3cf4eea11c908696
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fce32ac5c14a050047c881e57cc7c8e6b0ee5147
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:923e69906473deaea2553222f6ff4e657e58ac6fdb7f94a848b0a4cb6c9ae0bc
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..422d3c287676f69732da0908fd2b7fa1b5ffa88e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77bc5de08bcb58860a41b050dc59dda3c21d7ff3d10941a8d930966025b0516b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6ba6209fe6c681af31f72404f685306bbc17f22
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d971053e39521ef2b1a0152d43e575c3601e0af7e09fcf13be1670dd7d7b0e6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c8b48023e908316081d889988ffa9ab4524c7958
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:048b0526ada354614aed2d2d593c0e0be4567661de1cb81a47c021e610a345bf
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9393db98cf475e6f0ec27061b5265c5b99de867
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b340ed661be38b38fc17a9fb8f9dd61ca9df26800d6a44b1a2e36034589eeabb
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d4536d98ead2908d62c1e0faeb44bafd40c0986e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41799e39f15b14d9cee0395942f686ba77e97fc9a8423c52cfbff9fe504dc621
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66a5cbdcb6a3387fc71efbadd3153cc4de976583
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdd1b2c84d9905ab025b82233418267c2ddf524fa1c7f6692f66ba29c682feb9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c65c1c171bc3fc6d2a99a06ee353c3facb12734
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e380212e065795a9098e08e93a3062e6ca2cc7d19f69be3b52e97058aec0630
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50fd0a58a02b0cad609ed1f99ca48d38c3544b26
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d3625c7a3df550312a0754165773a279b5f07eae3e958b847a7c7554e7affd5
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..90d43f878e3f191baf3c26c4392535c1712c87c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8abdf07ec4178479aef2181094f755ddb9f3ab409d59bc7a5fdd91ff7da5adf
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba0b73cd080a72a4658ff22f57d73809f02934e0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04b0c7f3a01bfcc8cdfc41e9a533a703f0b0505ebb63ce71b76bb4fa34303876
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3bbacda3f2bcf2ba33f0c59df9967de825ac5bda
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ee04c57a9aa5cfbdf67573b21d6e946a1c9488a158626e2b6711ba318a2641c
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc4fcab8a678b22638f9eaea9a691a9a095cd32d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9109f235f9c86cc0a1a02dd35ca2947b66274f82bd792c9d25feedf24ce286e2
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf9f27238fb899e3c1c2899ec1270e7ba690d9f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e69ad6c6f1b6163c9cdbeb449a1101197c3b84ce7a3f9c748dcb17964d05e79
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..515bd8b5d38adbd4521791fc5b22f1af0ad53acf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24ca021d5193b8eeced2179e072e260d1f98e6593c3e80ff5e8e6dda744c5c9e
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9fb2f299fa060a00279547932ac7c3c54546b036
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0452070e5b728ac8717fbad0e0e262065081786f364a198ac03af2da4c65ab03
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72b7c80ac8acc0293f4ca67b9815c8ccffc09efe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c776162b0436737f8ce981e47e2423edaa50e7432ab681904d2eece51de06b3b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dfbb034dd6b4a3306e66aa4aff9aba8901e0f71f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e77df5979a8040db0b996397d64112ff47ba83aac75acf1db2b54bfbe2fff0e3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..616aebb438c32064ff4c832e6ffd67acd3726f0a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1501925676943b0e896a4574846f5fb46b62253882f44674c6dfd5a36785746c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a7da1d45aed414ffa2d26aaaadd55110fa484e4f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38de27cc5e8c17c618a44958ff92f6451712a2d4ddf2d286fdc2e991dc2ece6b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2adb1ede817e3a8965845cb6d819d110b1d7151
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:133228bbbb8c938c56c8c2dd57e74855b89728c0882f33b587feb1f614112dec
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d9624a1a37769e55b848d15f01a31ea5b444228
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e815762f4018f06fd589530a87781145b72121e9be06d230e170019dcd4365f8
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3cd41007d3874ec98da37be980cbd2b8ce87f920
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45f298161e7422cbbac7e9eca6d9adfd8e61075351df430973365d885e7b3829
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e0c413e746338396171467e37fb5ac6ad579897
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea961348981b1286cf0761726a7cf403128c87937407fc94eda7ec0c9d803503
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4da3b5515fee97a5a7b7e9b4765306f2b7f29ff4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5ae24f2688a32cb3e6f8268e145a88719738b3d1eb7695eb157a8da6cae1942
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64c1eb5ccbafe50e81a29184d5e5f083a5695233
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca5ecbb785a2738391427fd9d6d801c8edad114c34e8ad272cdc2fd3d1cf779c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df284be2327e65951a22de55c94e1fd80d59ae34
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad3b4c768a79c560271d26c280a4e884732cd91d2acdf445a40da3951beda9c1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6d2dbe825cb89a59e20650e337e5ecb0fb0f3fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5e9f21a9e2f18daa2e1d5396df9788a824796800f6c32d1f84b9611de3bd405
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8cadb4be484f4dcfaf23eaa8c4a8a887e3d145c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:727ae97aa83de779464b4fdecaeb58ac586c338d4e806e85aa063e85a636b7cc
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f4034ea6afd6d4faea61d3e44bb5cf77ce1b85b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae0523d4012a9ecb990eaa40998aacba19ecaf10fc6250b6e93b87f4089664ba
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22da592d2982c5d165016dae79a685547c9083d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d8fd63ac60a505ec5ee1dafc4a4e4b2f270c51f93e803b047c3c913a99ed2eb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..388505f0f5fbd5cd4818f1afa79f8586d78f14bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a38e70948d360a4886e54cd4b24ccfc4ce541e8e6727eab1a7a59ca12514f09d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64ada35228b99eb7fa5359cc117ce72ca4389587
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6fb0902b5ed662bc862a3d628111c44c519d26d4006c908c0831f2187fa4f6e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aaa1beeb9390e83a2756065a046c3d05b765f9c3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f6aa9c3ce145da1aa3d4e8125bb870623287a64ec335438936ea26e4dbf9e8e
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6baa6b21970071a1aca336d02d761ab2875a5996
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95497541c7a4a1fe44ecf5f57cc33869c82bd46ceffd55c6d11a90b2c0df0386
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6560671f8f7e7b2a10b69ed4e8b22bcc8d4f02af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7672b0607a9336884786ecd716e3b57f676c6e721763e03ad9a3a90c4fe7bec
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cce814ee035fe51cf7894852c0c959c1a1b7bc22
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cd1eb79c0eb705302bef19f45a5cf8afda67f60971e2c2707abdbaf4bdcec54
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e232ca6239f5f8927076882300e1007d9eb340dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:969d8ebbeffe8d4a0a62d1da1eb6d98d37b590c61b8f0b7e04fd7b669a95aa6b
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..88dc5522a04c7e16ead07b594a0d513c82d46b00
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89c52e7c9ad59c3f46d94a8140bf296e8bab3d1be85dc6a21bb2508e19650502
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea92d15f484d0f07e5e7b69ff9fec4550b753bc8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb97136e3690190f249eb946d09837e3be65c0cbf80f00fc00c2725a283a4b69
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b461374daadaca490cf95685ae2ef173ecc69af6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c60044e47306700a76e0ffd6aac50af94a1d411c49b470a698539dd80034d30c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..705ba9747af1b13feff34f1af35e6c684a034f66
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a998d422e656c9953bae5c03a822ac806727c757c83bdd7db22b55bcf3476f7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d658c60d292e677b326ae0d39455a30fa58a9906
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19d10d977979a399398ba476d15e3b8486fd46dde34dce3278c3784b0e928b5e
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..392c9b962156ccdb17cdd816bd16b2bb56a7da68
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83a114c92ded83631ed65984e5a47d004d88079b6a4a2cc6a0922cab3f299394
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7929da2e5c7352201ddbcf982ca45b6f1cdef3d2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6519a25933d980c955f8d548057b3934c84c1f97fcd796388eb440b1be4e502
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2122583e35caeb6217dee4e57341d69ed5373bcc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94b9ec7284f6e6877319ead4d1def12cd28dd4e0707215c34c6a6b93b206bcd7
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..207cf3018ec3774c0830bbb2700ad4daea1a7f2c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea54e8869cc0b821e179812bd8168d5b0b153c9237826107246bfe1407abd3aa
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..377e7cc29d6d8960e3597c28f07747f9c1e15069
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac2f1dc7882359224ef555da1bfcffa6f07148966296a3f9fe6bbb51d732460a
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70130ebaf97f9e316285b5034ee4ca78b838ad5f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:303b2ab51c045cf1dd9300db14f7a68b8e0505d085cbf28982a4bc4bd7c5af52
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bde6d0aa6aa353f09bc4e60cb853c8571cb843b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b27b519f3b732fff4df2a7f7eb5d8912629897dddb984f9c16fb6fc758b6e6f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f8342c7959ac38aea1dac7530eee3fdddbf24cd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:737107e626fb398191d26fd76ed5d62b70c112d0ccd74d3b73aff8d648a37e29
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.26.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..93a952a944b8f555bd2212abfaeb5fd3414f09f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2e22a58bd61057396cbecf71217f1aa057d6f9a8b08515581a7472c11409793
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.27.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a57d8780b1812b7386809b246aecec3ed3dba61f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c26b8991fccefa6b3364592e5144338aae4267f42a45254d940f78049d488fd
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.27.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.27.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb3f7399697aaa26d767e1026b2050bfd62f8966
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b40c276b45f42e0c6072b89d90f30eeeb765e3b0c9f872842cf2cf970fcf8b7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.27.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.27.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a143974f9f0ae067a3829bfcc50575866589ffef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52f6b56ece541f141b82a010cd7d75e741b882b550c67a06cf6d29a69137ca3e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e08f0eec91078bfc856a86d2874f0b57cd1de6b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e8e44f06179302962ced3a935333a2590fea506645c105dc49390ba468917a4
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60de7786949f2b9c2254ec040cde157e5f24f081
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:085b8245c76d35743824b9f865b11a47a39b870240a3014bfebb7c1bb09a3228
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..336b60841aa684424890cccac0d6eb09f6e39e7d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c0143594de9d670319289f7bb489a382ea5bcbddd6d5bb36057e5824d2765e2
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46e0a11765dbe80eda74e555b10c795b0d0a82ae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f02cdc2a45cd4eb66a0832b6259bead9fae1f06b328f327718b84656b937a11b
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..68609941182a1890988a1ea1920463a555b4d1ae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e16466c4ccf867cd7f4db9e56395fe9f7eb79258bc1abfbdf6a318ab5861014
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..135c633976479f01817d759bc85a4dfd2aaf92f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a45d784b2d16ba74892a97b85d1db0d75bfaf4181f42480f975259b70722a03c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a7b2f4cd7af63e10ef0e1e13240c680641d2d09b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63f7edcb64df3202aae82baa132b9b0ecb6aee7567e1d4f1a6248bd7e8882f5a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4b3e80803314352510a4949ab35ec920bbd4f65
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88ed4086e434499ffa027b821c75e10168b00e4f7d35c585a1f03afc8095a909
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a698adcc9007d9621e25389ef932c4980b67ef24
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:464a9c294c5ab003d21c4c6ca921464b84a93c8e77d45b2b34d0fea633c5a6fb
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d707fa62abeedfdfbfce8905bc167f8270e1f2b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75ad9a28dbb278005c95a1018c06eac323174b53b68691710375030de5bac0f7
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bdb2a5769c065c8e6da497c476330a2f0887300
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bab4cf0a3584015db5dbb827db8aaae0497acf5b4b057b86f70834481bc5b1f7
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..23ebf93bdf325b7022ca5e073feeb947568ece4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:877cab75f547fbf0bac578a6409194a3f2e213ca3c7a8fea1cdbeb905d4b9c3e
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff566c4b0623c47d0da95519f56d12c3518939ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48cbc0daded12c34e14b0869855e307e9025a57a35c43ccd6d3e74d6980a966c
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9065cc54b08517072e96315f46299f2759d5124f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e653a442dd003e5acdfee8c92989e2b222cd934532d8898101b80cfa89b1ddcf
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..76ca7dd35eef8d4281feedcae042ea6ad4d134bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56393e64562f04c097785a99fa058b1c41ddc0157503c3f0af7311a2e75fcfc9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4df90efb06cc81ee7484ed247761fdb444d33885
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:730352bc98d1d3f0a2c7740103e78a798c600281d55c86993c55933ec6607fd4
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25d4ecc7b64332c6a454c387c9151f57ddbac9eb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6cd60368c5144a5764632db5ef0359e0cda255de52e9db476630637949c85db2
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5daf61bd535d2da33f747ccc9e806920e98b7e42
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dca027aebc5bda551814e148d622a1ef1ee6eb6a0643f5561c8268feea6381d2
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1031d537079e416086107563d912a1939cb02351
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2b46faf1da2d897f5d86b348281e0eff59bcf958f435ff3a5f5a393613e6e4e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8cf7e8e1bc7547ba65584761a63db00a3bb403f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7d228c51317106d9eae31d65661b92a026ac84a951fdf2ea6c031fdafb3e732
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..03f0b16fcbdcd65aabd4e81b7173994a46f10039
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94f61b5800ce4b86e18ba95bb9c9bf602e0f5aa3972f4b9ead673e2c079a8af4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0945150272b139d2aa59bf5647528426a559f08c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73d75bc262e7e9c21a84359def488f0c8640f98bbe894db178ad4f530fbcd5e7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed3ab345568c06f8afcf118c182a0ae17bc74655
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad83b502d53851644259e920907b875f39f8743ac095527e480346868db9506a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..702eb3bc406bf7218eec083d1dc53980826e0846
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2e9560ace5b4465af6d1d59592ec0d5dc38dd9eac1d55febac459b23c66102f
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af0a4a388ec5d3fb12248ba474f80fc21489815e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2af6e5a032ea9b82d2aefd766bfe48d6ea93b88bc28c9ea5fea07150ba9cc6e
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..525f16e32a26f62c57c629a73ae085f1cf7342f8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2615a385b3e48b2d5a6893e8161f4ea9b2ab21bc3dd8d128acb29d44d56ee5
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..770d99cdc38005981b09390595c1520446c707c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2a85929318c27a14278433d9effe3e2f4dfb14d20668f23dc4ae7c4c8054d08
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7dd4f7a51e2d93de0493eb1a4c3c3f57d0e64114
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50c20799a6845402b2d260675add3fb62a2dc9f9e9bd92a8ab26c6767e107e31
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58f757dd5658ad661a3ece5af0e26bcecf6c8a7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95bf1a288bdc5fcaaa52f981ed1e580131e1390947ff2e51311f0a007c50de9b
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..48800d6cb1bcc33be29cc99da8935f66197ce8e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d15203ed6fc963625a328fac93d94edee154d280fb2840a0aea2843512b70d2b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e56707a878851cdf1968062a5b3dbac6c47f9431
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65c286bca69fa465fd9e09318bbd4e96d135dc2e5660a26cff5041a73170a6c4
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3a96baba9110cb7c06e9bb23bba4b7ae5597e98a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7feb3f17c64cf427c76d89802dca9964f55fdb13c4e1633c27f6979fe9503ad
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6332d8f768d125672205b7a68f61ca51eec29028
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20d3e3b0fb24e010279a7cc6956822071b0511ef2ab8403f1bd9cce40d8f9ddc
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..209b6c7fc7c4e8d26b511f9cf60ecba7bbb55925
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbe5c4c76861f35a52bbd9fe73e0d34e6235e4172c78f24b780032dcfd2b948f
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f967db4e877720a4b55c748d09ef8511ddeaf315
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae6a2c2134e399d7ac921785f178114a1229fcc9cdc1d4944a8a3eb4fc3f619c
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b75963f7d16fc9316328cf9d16f95d4cdda8d7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eebe677ad45ead34f794bf1dc2a5650cf284d06995eca5aeeca7e8c24cf7ed76
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0fc1d42923eff6d17f8208ab6013bc59b251fd8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a9b4efe3cb7eabbb4e0fc76727846d9a94a175b8ed11c57d6bedf89e204c603
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf1cd31fb5873614b777aee05a94fb63f020060b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6c7614943b4f93ee0e7215206258d52db075d3ca4ff08ca6c311c618456620e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5db785834de7b28bd97aaf6050a8c46eca979a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69c3c3d30f3dbef2fa0ba9181e517d9d07cfaaaea45aea5f6621125554beade9
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..599cdbf8129f3993ceb16da696b49a9d61221386
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1f26344fd4a886b5ef19f295a6f5d91bd1a9671283119eee8a56c3b172bc55f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..353e93fd4d4bcf1de77c0bcf921be633bf64f1fe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97dacec4e7a5a44b77448d07aa6cbecf827d6b8a203c751e2a95c0db56f7c74b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3e8833d9cf45b4ea364a329ab627c72d168fcb5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c771cdc18e4a9d524aa2be06a54ec6577ebd51ec0b6028afb434c7296b97c358
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f23d096adf2cd59aed1cb0f4318b906d720e3b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c9410b102fbba6f7cf442dd706cbddea0ebf045a7c7bb13866aad06c28b948d
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c61a52a97a50c390cc39552d77401ede9d9586af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ec67fa67fb9b93393cf27093de3f63c37dba890179205ff0e69bf85effb3ae7
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c271249354504eb05edcb9eb072c2de91ed733de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1aa1cd49c38552e9b19a6cb934048ccca15e56b45a3444c58d8d9a3406cd9ea1
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dfd1cbf6576c88b5fb4ac0e7698689a7137d123a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f930e361a39d452ab0e51b99e1b3357c881196a6191d7ad7d5767c20156167e5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f924a025da17ad03af74053df28ce84612c89cf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2731ce55c9ef0f4a9eb4481b8c0901d0ccd5c7c313b719a18c02b8235eaa8be
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec4247601a849400158fcfc86fe5092fe61f6d0b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:924b4d4266dd82cf2187fa23fd13774eb6c0cd44dd530d49113c7c41c39bcdb9
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eae669f68d2175988f366d4265accbfa6560cc13
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d0a36a934dd979685bacca353dd0491132aba6d5c2c816ed20839b1d73c3d8f
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e641a29e31fd2ab35bc2dc4354c957e782f35c30
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ac8cc0f62d8f2688ac126acd0a000240a99fd02fb5b9b1756030209a04e2da6
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59010c1528d2c0a691923d580ee7dbc087df2ce8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0510afcc3ae548b2fad15a04da21d74006e31f3290555fa9a53ce4eb342f76f9
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db309fb1071fc79e17e05ded4e10def1f3aed2e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa6d73b43bb29afb07c3b5a621bfeb7d84a483530158fc6c27fff845de024912
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6cdd7abfad7eb37644f43af92db1b8ccf0661cb2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8947ff481411a303471b148dc753d852cf8b51f8fd904e6a9d1697b517faa88
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d384fd27dafa32dec349edbbdc68248e912554ba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fd01945ea9311b333815415cc3633ea9f76aab2b3297676f99ef4ec2777dc99
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20a7c0cb5b2133aea3b8bfdd1eb6cbfd8c1c5127
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09dc0dd58d5d59709e7f87ec190211a166279240aa873432c3941a560e97aca7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8638e8aec6160504b93664a4548a4c7249f0afda
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40ca4ff720f2383cb927bdbcf424d1113a7c7dabcdbaa46cc0143390bf24cdca
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.27.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..127c18a15e1d4205ea94b8ce55fb6d4129c2e1e8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c29d4713adb77825908e51e2944a75e14a2c914d249e1a8842c15f811d8e411b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.28.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a00cfda3fac8cb41aeef7295037d6836f5584058
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d954dce7201026a6cb27cab064a6b674e7e8433f98d188592782d392469032e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.28.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.28.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..408161927ed9f29eec46233dc1063524e95ea687
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5bffe5b1e1086bc0e4c523b589c393cd2700b9dc03be35e7a7bee04d4b810599
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.28.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.28.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6e01a3e4f84fb33163fe43476c583e950baad70
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa7084b46e2fd1af5725471dc7209ae04833ed2662c97d93212480cc7ffbbe91
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b4a414cb9994e4c8fee83d94c35b90d2fa2b968
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64f467783f691e1fd648851897b46e261ec16ebac1baba30288b8a0010e60a77
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc2a62e008c0e4ce8c4ad52b219d3fe743ec9fbe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed869d211a6ae1c889b32f03441b47b3d5c85db550eb73bb8445975ad5e61866
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..03e4658a9745aa4e482146d87adab838cd67c1b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:852b6733af85e708f92625ea89774e2c0e9bacfde2cfacafb8d7b2e3b6e7a720
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58bfc7564e9709e95c6f8c2d1e399d49b05674b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51ecdbe691800fcab9ef894afee69fbd7438cc1dc8f64d61d29b42acd4aef683
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1081e65aa41fb86eca1a4e8b78ca215dc3aa475e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01adb8341cdbefbdb4c2fc81ac3800951e8695a1783d3888c7fcbc0205c49e4a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fbde140318a6750e16b380aa8df69dd543a36e04
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9762229fa1694b0d85a2e796bcc3fe896e274ea3c9f967f87c6089cf7b0cb7e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe6a4f098c610aef37621c077b4d2974774f454b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e1182a3f313d7ca08ffb586583473745d482e95e42583876fe08ef1793e8199
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb53ea9754fec94fa30c1cab74537fd919a417c9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b275bd4ef0ef1428c829c37da654687c5409ecdfb9cd5118bb2781d7085be877
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b096cb3cb0f89387fb8057fef6d24c4b9cec4477
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a337f1adb54b6dd8cdf0d5a7b3f373dfedc7b949eed55b8d1356e4f5870ec9f7
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be337a3592a1cb0f186475bff9e0473452606f4e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35ffd68c116917699c0bd2ce558cfdd2ab8b17c195c0af618ce65c209154cd85
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fbaab4eff61678202cd545050fa98ba0737975dd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6648b4aa86d85ee47f4c2737a38b6952907b6b7aa8eda96a8e0d3e7b27bf6c9c
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d9db28bcb72799e56cfe412d876bc2310570fc5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:513f624821e4340270e08fd9565530ff2f274ebaa15c760f71b951183da33bba
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22a9ad22bdc247ce693e26c121774f370e1bed9e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2d231711ef8fe4397e12455b42faa3cc883d2588f2be7433e9e05fcf66ec253
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee69e93675f9438f0f49783356874170f6692d4f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e018af831f28fd8f761cb5e6c1f46fd99c91504adc83e1820a7ee0fb6a8a741
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2cb380cc9af6c3033ca05a621f57e74cc5b4d0d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5143d450f80d134e599ab7ac9166596e792d851018ae7708f73af5d6cb535f50
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aaa0db59bc7005f4153822709a872dfee7471d47
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b4fb066884f8da1edcb63f8f86612d87a8a084db8c2edf3e4fcbe5a7b6399cd
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..55f48ec75e3280f559934809594efa88465cbbc1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91569f67aa6dc20a996121ae2696c388d230ae8b5366c4eb0d7c2493e0f8a310
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa79f811681027f861f4c926674f369e6e41511e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a94396dbee36018d82bf9839cff81e991fc7b44b9cf10b08053a46622e49ff8
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b04a74d0ce1ae8df1b749be8591655e1f1dffd44
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcfa0063adc050aaddfe495b1cfcc1df300bcb36353c8bed89d0929c1c041dc2
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8a8286fc8c0ae3be3fc802c112e181bb5b0ca2fe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ec2f6af94e22e5fd9ce1e84c73bd3d5f5be67a96c3d981a5e001a87560321ba
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd5ea3bda500356b2810f8c7273b9500d81c7292
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2420659c8ae0848bb1d968c0f7d6966459b212f36f9dd3f077fde2768f8246c7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8204d1e3f11e72084335d6a3481d714a7a046db6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4855ff8e9be8711aa7899e9ad1ea0e0927484406f18a5e5922d051424b2c43dc
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69bd8b960e615477b4a1737bcc256f8aef83f34c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dbe160645f82d5d2272e5e84df22154db8ab4a72c45c9297bb9d3f5045219c4
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8ada50e94b831e11e08b73f66f9dd3f1945a00f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a0316984330db771ab17c674a49cb3414c1a322aa3da05fa0bdf2b34a71642b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83906c80348b37bff39b220e1c63dcc0461b6d0f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:830e32e05ec21cab5fc2e2e77ec52f395fae9f9a9e04263d7aa4b8e894ca0ec5
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c3c38f72b2af8fea87568bc1224baf2997aa0a0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b311429a0e3778d7af089ef9992671c1258775eac97e1eec34b3d50b0025ecc6
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0869e0d3f2f790b68d7f8624e5a5b2c1c878f140
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34d58488b02d6f2679a80548c8b052a3c198c6315a366c70a901dff728ab693d
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d11f116ee219fc7cf3394fd24fd5f9ea5eaf4f84
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f439c3e92c764cbc4ca4fc1008460ee550bfaf3b90ba73f34b4d8a4cb3a2b89e
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7885fa98d083b07b7c4a36826c4c4b05b69949c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20d9918a64e401e22242f7b9145cf9c6beed837e13804b47ac18fffe86182952
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb6e140e4e02ddae93e5677231c71a560f37c53a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bfb50be55c729a3a268d36dc86da4e11bbcdaa61586a0a416324f45c6f4ad14
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b442387d55a432f30c250ed1f58eac1ded5d772
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91de16e960e6fdb7c7c75b0d33976b17499332e543232d91d3410f7bf92a038c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d1364c560b51b62c6d8a99ff53969307e2953a6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:776745cd4aebb08e0d2071769d293a2dd89d81e6f85735ee9d441f88671dc339
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59b853de9f2510497b19782c0b294031683efac1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5ca140724d9db3b04cec367d8c68b4ba839e5a6fa1e731de82fde3d8bfbe0b4
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9345ffe17a8d95269c734c7698131333547887cd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8478ccb9fd806f08ce98921724f554461110b2c687b413153927b9d14474b7c0
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db03e53ea082cbb5628e3d8fdd520c4b6a7508ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15e12e0a7e5319ddb04dc711b6a42c2dbdbf01a1d0a7aa3c05d7a32e98743872
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c09f04dd46c79c5e9ffc4bbe5c2c5f788f090247
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77fe019efc7826b955f849b2a1bfff6d7670ec9b61aac4dd4086c4760329f7c3
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0c08d73a0c579d673e279b404e2133afe905cd46
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c60123d4662c9dac53fd40dec19282d980a9587f480f7089c4cab018e9348070
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0239c9f058bf7b46a7671fd3d954da209c3dcbc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e8487e25172931303b294e6eb1cf21fb981738980cf43fc22f285224c7c4f86
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09042a73a1446518111ae669e76e3edccaf6fc26
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7fbe36cf9cc6e905a72b4d006b1b7dc08b5dbbf4bee62d3e2acba8f402360e2
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9899b0e7dddc3706fbd6a43cbe42670ccb40a89d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce44b3842675dab4b6c95c60971d7ab41db2ceefd58f73c48b025ef7d5975185
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..318115a62069d489414af835fd7ae5e20dfffa45
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:934fe45e00b1789ee0c4187d615e29e9bb6dc0494e689d911150a7e9ee581a39
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3fcb13c19fc656caaaba85db6e1874c38f69309
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3e238ab4ab093dcff765a87d1e3438267bac8ae6e20a325dd8ca31c1281ceff
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5bab7e5297b6ff35b62f98823116234b7385fcb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eabc142e406c71ad0ec8acae09f43e33715fdbc1693341a438f0d5d743189f8a
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ceb27e87c68db7db9b6e69cc00430c8e221b496
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b64af4ad1e73e407385e34fc6293d41b838600808f5219406c80c3968d84959
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a537cfcb565edbb6181700dca94d9af3fcc77235
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb4ec87cfbcd12b01a65a59e0034fb787537167727a3b231e961b1803f8e2e9f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..41475d1b14ba0e507a4f7712f7597c3521b2d65e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45596e19a15882820966274450e0c72e3bff6b8469bcbb9c7cb290dce6722768
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..918582092f72bd011adccb64ad899b6a581c5836
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f8746303c26c6e7abac63fc604359071cf2d1fe6e4769426b03a385adf48be3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a27576aa79645ab80c65d54938439f50c994a247
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8e7df1a029b84486be30980835d37d1eb7d8dfcea2986930db95d9f3cf92c3a
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..049d0119a0780d74654511ed943533335a76fd54
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a59ff2688dd36e7dfa982673efbf2aa5379730de63937e92b28e25aba8294f44
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a8bab49e59426c8c38860f20b0b6d14d93fd471
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84310a64a443e07eff15c9e03c04c9be35dfc38d551ded1b111d32f3004c98d1
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e00b6430bc21dd77946ed74697dcd4be452180c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53f494123204d73c369ee6cec3f6cb48e46c392fb3eda423973704e13d500b78
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..022f608b0ddad3ef6ba578e0f44fb6ef9c059835
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d834fec575fd7eaadd4ddc7e9a103384953410a8b5e2ab8d4443f8b72da11bae
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..582637c623bf85e16b8ab822a61a4fbeadcc231e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b57f0698b505d14b03b4f3a13959c125cae6dfba969191a7e003f422a7d6e02
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..06f93aad1dab9f98533aefbdd2a1f25ad12f9a1f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2f26e5ecfcbe586b63a76d27c9010579e5537596fd8fd9fd95b2358b68cf719
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..49ff955bf0cea403106bee6736c5540f216f0e10
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d164e6c515b0344dd64fac86cbf8b95cb8bf61fb83452a5698ff581167074e6a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff222d4b76a71c08b372c2e19d21f6224406691c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f7461c7ee0b0d511402b750723d312c108345990b1c3604eb46657a36241e08
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.28.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c3278986dc31d8d54d4aa6e8448dea35e64fa05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ce10f133adcda20439d0ff8c73a8a8368e076a66e178c61b45c57a8a04aebd6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.29.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2fe47f34382e6b64f86f1c25f73a24693ddc8041
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:442cd5fadd19f074a33c857cfc299e6e722b7d630ee5c06215f22bd3ff8ed1aa
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.29.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.29.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..042d7cee887281b1714431dbb5457744d07305d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bb7b61da45a8fbe174f9bab1acec987cb5c6d0e9e52433044fb0e6ae4cb94cf
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.29.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.29.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8414de9a7df40b04ebadaf6075589b477e7a3462
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d61a189cfef655a82263c66b6b92fa06319accfcadf5e920ea0d11950df43f5c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b24b4e608f53ec45a17cdd8e5f9d5b4b6bcacc64
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb718dd9dbeb6cd3d5f4c2aa2af6ecea4353e05ec22a92f5b5b6b94faac28e3b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..527dafa928e07d2531d6c917dacfda447e27b521
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33e153a4ccb1035a2b2942a774232911c97f9bba4335e6dfbdc3e5d0446c2e77
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8b312b9a95f4337d2d3158587d0dd6b43b29ff8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75a5dc8be4fd4957e7a4f020a7e57c59731a774f04a26c0897bb8b0c036d0853
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d15e885f3a3e0c57bf3ecaf4aa58557cb783802
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07fec86bc6ef9961efce6cfbbd590b4f1f74ebe769e372ddcd9741259e342e7e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b781b1072e9f4672eabfd17a13a164da34c61fb5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b90962c67b77e35555fdf30f737ca917719fd11daaa8a65bae524ea399d9caeb
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dae378253bd33e2da6f3d38d56f30866bd9e6a8f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a9f4a79f438746e5a63093c3d15becade18eff1353d25132e4a45c2269ef0fe
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f802969fcc23fa33cbf972252c0b4553623bd3aa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f67c45a48b6ca3625470f9fa6dd2cf51f033c95bcee443e29821637822b6878f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b862eb6b35fb7d89861386ec78e591904fb2dc74
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:231a17d0f5b5b8de1887c81171f444b9bbd53f2fdc0695c04dfd426d1b28913b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a493597dfde181b6b94e4b7582aab68db607e9da
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da5adf2dac108f6f92e03c7e9a6022df1b8fdf458bcf468a31d7f48e7045f3dd
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e65ca307724b5f6e52883ae1a697bf7585022193
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f35bb8e9fb4c0f4fd994a9fcb885ba7363083170fc2c93f76bdfd9145ddffcd1
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2bb2280728d335c234f3f50ee2f51419ba9b7f44
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9a978d768dc3abe7bcfe1b417a2944cceb0379bc665da5bc0dd2e72b9d35afc
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4856793f5a9dc33f8061023aaf205c55c2180973
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85ff8951d5fcc45743476d2088151386a7b429a3d5e13bb515f4a0eb5f603429
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69cd451485a8c063d99cdb5db417751964188f31
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:451a2f5f8560672a46b1e31ab9fb6b62c9fba0347e6f08fe1fec6f7f35362ed3
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9fc739572c957f905f913e83500650c8a9593e61
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5747f128a70b9d1a1a0ebf563411dc26fbde05c688b6ffdc81d3e60382fe1e4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..24ae27637f6ccf3a88da8bbc00e33c75e5a4c190
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7722fb951782d2a99e15801a4d3b7be195eff70bc1cbe1b5f5fd829fcb01bb9b
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e6e1c02fbcbbadef8ef771d8ba3e5f24387f542
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bef8d324f1f744ca36caeafa3108bd1a28a9f89e024c5d47ed8130bb9f66522
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..30a391ac2a1d2b05de19b457a15af7e12ada0ba6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45826f48f6d002efdfde76df68b6efafabd1622f7aad14bfada3d441028a0ed1
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..918d07f7b984af6a402b95f033fcffc9ad67ec71
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6cd5b91a982383a0bf6c1aebdd4dc7a72c96a3368aac8102c1fb020fa25c1a0
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0acc76fb1ac69a542599df634c3f13e90d01299
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69fa55f519e5a8daa0964381babab60afba31d715ce3ca969172820526b732c7
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b41b2b6ec97d87cf3bf9d2a95ef3fdd24340c0e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c2f3ffd6a361cee0b11d838b6403e99741ac1c5ee811fe81c975f5ee454c609
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f51ebd6cbb584697ddbe7cd6ebd85866c76adc05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14518486107ad1b415433214880f3b8db5d3c7ddd1cc48d539690612dd1913d7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e89078a4b8cff05df1a36ae724a919ae31b57d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c31449a907008326df4149f46a9536b2dc7bfc02e7626e916e675085a1d0ac8c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..361b828382bcfac13786b88b6ad60933e44b854f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:063e06aa617a93e9f33f2b3183d8a6efb5beb96b26d7a433959174c58054d2c7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..498f3201cfa19e8b3775418fca0be72c0f1af216
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffe4badff4bc58dd12922e33a5ad40bec3057b50a7d677739f3931eb2fa8a9d1
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..525850ed48b7768061ff8f7f02d472e6a9e65248
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46bac448b652e4ca988b7b106f8a9aa3ae7df38fa4af530afe3df27695877901
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37c16f4e6d3557b1c18ff3d912691598f7b6b140
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2744d4e558a4f016fac60a2a306e629834be8e71d2b9e9853a330fd8315a6b49
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0fd469cd9136078d0de5dac7a359b0104d7144e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf0c52360cad0c97dd18f2e0ad61e380db24a1c1840dbc6baeb8497998c73c7d
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..81c282252e545d8a45d8950fc59cbfd80821bac8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f32ae92347a93fd1edd4d68f22ffbd2e2efc10c3a84d649bf17d053a07a9215c
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96e38b6ed271e9a3ccc479e304bb772388992ffa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4417a48b9434051031312d7290aa0fa7084f315c29fb0b9c012ddcebc3871403
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..adcdfe90bc48244fe16e00a5acab9a94332eb1fc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09705068b02fd5b1b0a36140880a439b20f57a3a955a19f9cad9011532a1fc75
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4aa3425ceb9b89b29e68000dd726338bf5e4c7c9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8344a6c3d1235ba7416591ffe49e4352ad202b4a1f827543e54f6365ae05ad49
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..236e33a76413091cf0e40126397f06324d61d838
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00ef0d7fd8518f446d2c6fbbb2f7cc4eed0b19824c881244d850abc624b88acb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42a6665daa42e6fe549157771821d0a7317564e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1775e6bb964c3cfba7e16f9491024f067e2fff3c0f885e85253db8423fe6ff35
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c8b4aed37c3bf6c3ddf33892a501d6ff6c20e2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5b2578749e9c6b7d6b5d388b7a4353a17789ebc85e059dc664d76a041c81196
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62df24b243d28da96d70e7ddb72acf8401e20dc0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15f6ce7aa32a79a6af492862bcd73690249f716591a01fa784e4eba355a0b84b
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46805c2325e9eb053f05132dcb44b4651a70206f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:445c39065a0b4412fe204ba44a5d011f301570cc79c25774bc4716cfede874b7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54480c3f3cb17d0cc5317724afb3a1001d16d8ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:212c4243e694bc84c1c752932ad0f0e815bfa87a0eec5142acd6e58be5b8cb5b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..245f8dab7e6b4b25855540cfb4d30be57d714a8b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ae23fc0a5208d1caffd4bcd42e3b9fa4ae3e5b04c6aacf87be5fe302c50c720
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60e7e22cd68dca0209d66e27ebf55fde6dfa79db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fefdabc232865e6108b864fce07f9a88d299dd3b2daf03b62aec2a40c90a662b
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6d56ae3de2278f63fb2b80d5d29da3a4555e377
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ced53c9925321cd3df088ea45413dc67570b8b53834d927d6bb09166e4ca1aeb
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22f7f0375d6ef1994ec31f61b39bb606517d16e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43039b81700b90aef1f60052fe1b6e2055c75a6606513269b19ffc1d1d961836
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..85065a6f7ace17442c86b7eb6f27336c6ebaf1ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:714ea86b015de30ab4c52e6016e8a2549f1efa6e61c62f7eb2700783ad3a5b2e
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a435189adc1de50cc32fb8aab9bb1971e0eb3ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:259e31dc182b07e7c723abf8bbccc29610e17c439eaba91c83f6469492babe7e
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f84227038a819990934f75d4a06997123e01851b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4d1f2ff07705b411711d5a0fe44f0ac39a94d585c04d64f78324933dafcdb51
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..89b264b58257c6feefbb423ed82f1a94987d1ab7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6fc1ca7f5a237afb7ed639ddc7dfde3413632a6773d1ccba85be848e8f17996
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a70852fe4df360a3e1b5792b28a84e3a70fd3232
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8af64700b5fed1c90e0b1e084f7ff659d2d8a6fc2a3b398e10b7a6d77ab421f2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..433b61b13bc88c7dc4d87a99de6c0689efb3220c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a95167f452425dcc9a3fed9e0f3039fc0993762793e08d7f244f35b0c190b148
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6688b1e495b2d51619e74a02bcda5fedf6c6c243
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9856671ca0c4abd594cd070f4f1791c411fd1742bbaddd19669817142716c77b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..144955bc32ba209bdcabcdfc2af3e22033d6bd9a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8be313c8e60dc2ff0ddc0fa2de611513d15c975aa08be4f88c6fb376a0f43d3
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..697d4c3b599926d1ac38318d2005596d4a6b9c84
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e9338188fbdd86691d2c0f18677214fa2748d5b8b6f4ffbbe375e2ecfa2f51c
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5440f8bb3b7579271b0b0bf6b1f94bfacc66a06
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36c2f7694fdfb49c4c9264c4c54eec9a364353cfef519bd0df519d22c7a1cd71
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9ab82f97679b30c41652b955782fc3d0cd6a461
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:700be85c4af4fc5219583490f9fd1478004c3e0c47f31d1eaa52bb2b17392782
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b09c66e9a2e5fda75d49f9d1bb1204200abab444
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:088c544fb84013d6ef69807cc88703f22f029e20d390c20b5b48eaeebd85eb78
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be271e5b449b0c4a0e9ab0a1d6cdfaaa58e272b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc28b7779dbef81d31db3cd0f38ed21176f2ea97aa4f499c19d993bccead6b0a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..11965922bef8ed617aa7a69fd7ceaf93a9fe3153
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53b733b59b889e55118d54a11ae28341a99ddaed1b95d2c2426ac23c0b189e38
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7aa524da8abd0c8dc40c577c1ff831bad253c076
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86f62de0f0c9ae5c0d7d0351615d02e2e2634472b88d5acc3fea4995414f8dfb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.29.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6534812ff23f9b871a5c2038c6ccba9a129493bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e75a33ad24f457fd9720786ed2cae91883c7108df4a8854a558518f14feb1fae
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.3.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b6cb10f97c770a8a5370461b3de23c862ec978cd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc5db1e0dca3c7e86100ef4a0766cc50af19f8281d53d5c5b08afbe86faa6de8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.3.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.3.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf2a62fc9851fb1c0a9dcc5c08dd9e82e6b21b05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43535ee3632f8a2820ffcf6d621d360619bb3cc9e30e3208683c1b42f560a1ba
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.3.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.3.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..596276c1d227233823ebd6fbb8bc5467d030e858
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:606dfb0fdd0134bf6ff7e219531ef738abe0ae17e07ce756b5f4a295afaf9512
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d08869ee963297582234bf18afb9ebfd6cf06b76
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd72e83f393604213e99efa804834839835137327c4dc8105f3aa2f3f92db0ca
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7480e1bcb3abe6a77710ec5fe87f50221cd35c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:898b5e337ce32f466513c783bd221e12b228d8c7eae67d528e976debcc3b6251
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1004be4ad509dba95aac99b009a5ff8875eb42f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c632424ecf41636f99999ca7e4073e73930903229c88d60e13763f4346ff4c3
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..86ae5cf31be99729b7bd25c129bd500f0f7fee49
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29b4034d8509c76bb3716d5f92c7e2c4d954de5f4b1e33d052eb7954defa1245
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..68be6c87cf99f8183b54e1f2bf97c425cd64d035
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bb43ce398ff6ffb0b7a495fc537879e676ee294cb61e6765f0ae0ba7df1c37b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..739369dc8c142ac6e9bda41c741f0b084593c979
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e28e82dbb6fb9256f5908b884f7e3a7e19e2d1446726f8a758e159867f01e82
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a0e2be6efbdcdb1a034531aa2c1df129f06f10b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9db8017ad84d1ccfb75e625c91f013be1a23548f78236cc2ca7a2eff87287a3a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd7ae729bfd0461993ec00d4459e2988a51e9609
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2114e5c8d80ce7b7d59275604821ed3dc62be6487aae5bacba05d3fe8c6abb45
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e5aec317e5513d0a9ad8010c4a20428f045638e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09da968a63e795e4a4bfb81d56264e8b3f03cbeb02b9c4e048965cc8a945416b
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb6e6c60df9e9e747fa2d2ef9abe1f0cc69ce25c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbcf655d935bdf4aa343308ad10aedf388dfc0a424167da6b4252f8089b325d1
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c8fbe5c05bd5526b2cb7d74a2cd2adf6e572e5b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5709b1bd0b27c1290e8febe6e77f006c70f098b6f777ed6e817b8bf1f7020e88
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd8e372a6754811da87dbf1361dfe9227e3d178d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d2f6f0a169f41babace06790383c654e123842ccbed9fc64878e6d23f20a18b
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8fee9187dff937e891fe3c8663a871674fbbf66e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32227b7bcb31c32fa6be36bc5945c677659d0487e81e74339f46dc317b7bd4ea
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fbf48b5b62d379f67e19a259d18b384119070feb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05a5a44ca62a8c902869f2cb813b4baf86b76255703f6b141a85dbceb1947875
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66a92cfe2c7c023362231256b0df8a0226cf0c56
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f025038a6f0ed06e2255ef3518b5591b2c18412272a2994d8d8f563604f73b42
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99ac0ccd522181b80597eed2635bb17567eb98df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1bccf1a4dc11202a6339913e3750d96e4e49ca1541826b3ae6dbb6aa5aa16344
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e69f2ec003a4239ecb4f893decf8ca1658c94e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a4bedb41d63df4e2c8816d6a03dad03480469f2a12c65a47e58d589be1990d7
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a4488586c0b4f1fa224a1cc3b1bafb37ba6698e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98af97684d6d72db2e0b5c9912620392debb2a3b80fe5420a43658298615b7ce
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..268b132c7e705b635e5ccb090d9da69eee8288d2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f03d6c530defb37a4915ae9c655f7566ad7368a0b3d1dab05e2ea8d7906a315
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d291ee556e7e8d8b5d177f97547cbfff5c2b04a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcafdf662878e60127db15d51640af9ea3a592eb84967134b1dff95b1ff6c21d
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..538ba3b543222905235989f70de36bca4e7381d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e809c604abeb78d589915a3a0a764c3e73bf8db163315d6b1934a8096d6ca515
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d6ce19a302076f70260f4e4f6b5d679060ef659
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0ebde2082e1b956deab222674dca2b701d9a94269bfd51c228966aebb0c6cd2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e027d3d8dc6a64b0fde9617ae671142915864d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:024343d64c1f0cf9735175889e7642b5aa3c00d3522d75d1d2d8b26dee2ae7c6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d407791874fd25ca8e436a0965951ebc4831d5b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c941623547c01654e1b778023a19725e2dad38c2b4845f915c8334f729125f93
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f392cd0ffb8997b4f68b76b786498f4fa7149e77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18f0d3bdf271e45947ccec8033e1718cd80e09ede27ad5069dab27d457d33dce
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f6e08626fac5d69c9dbba2eac45764c0bbd04cfa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4c8959a64985e0a64498481ffeac3aec8e7de3c179b75eefbefe5b776176d7
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ca6acd69b992023f57179bda09bbd3bca13b6a65
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bd8413fd0d285781c23c2efbf8c247d82247ab7f0dbadb310257e13d09b13b3
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d67baf17d903b5402a716e417b35a3cf9ed214b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed98e880b41ecf683f3e1ab295dfc19a398ec8d874e5d16ad08d3c03a80c73bc
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9ccd3f89e6ed32aafa437dc09b7c2a39c351049a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dfc02da8eb556590fa2ea3d04d655dd6c3f78d24d845a02790bda5449af364d
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e34d2a530b6e1b5b55f67b643baa28db11e05966
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4258f0731bd90646a0280e8aa1793f0e21c5033371bdb92309b7d2ce9001de3
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a4a5ee35c170db9ac1f7d6e7ae045bfd39792202
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:855313ed562d0ea3f15f38d80fb94bf18d490100bf4d01fd30178abcaf0a7896
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..76695c062dfd23820bf3b3fafcf77656c5a2f1fc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:588a79e0692da960ee3ea74a3ead568fdf09a10fac3563f804afa76ae10c9049
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..631abfc6c7b29bf0bebd36a381aa99305adcdf67
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8d7f8c14adce830f0eeaee5cf440447842cb21bd3c2915ff0800b3b172ae436
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4c620b0dfdb351762018efb98cd18a785653056
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98bfd742e798e66b3fc9d1f6ad838a86aa8e29cd65d96cb4e520d99e4ad6187d
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ecc85c370d3d0e2188ad5094ca4bb6231ced0d44
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13ae424a37bc71a19b53601fce26212c5dc6c4ab9925f08b3c04218456e275a4
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..208b9e752e362737c5c99b87f608e0dc64653f6e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f89b35992d12c38ba6ef51ad646d9620bc21769a5708ae974c9c392cd72b851
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9952752508a2ba022dc345c868f07319bb35f44d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5cd484a93a02056288ee8c2140af0886912e038f4e959b2207e7527ec5ceeb2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25ec1d91cec7b2105d4a9ddb2e2c4aa7a3b88597
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d412a84790831d7c5d3741878435132ee761c4bff4bbe15436b92aa4805d935a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..306c2c546e0b95c4b1091d757f65e41c1a224615
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a400d6e45b6103329c12775f1c05c09f99a05e59fabf0b87c6404a2c4cc0665
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1576dbe76463b0d746cb0d825e2b6c436dbe79
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:884e1ad8fbec068876a6df6f254db91b4c3b66a7e12e00d40a566345238b525f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d2ce9546f1646a99ae9bc871db22a6a527e203b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c41c7a4f09e2ab034bb3aabf5b63bf3deed399cc6de7c44e2613e6733d794390
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8a65798bc9e7243af818e6e9a6793fb84655c07
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c73f49f3b0871a707ae000271bb73f596a9453dff94a5a93371cfe99e8ae851
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..31dcf34a7d97d5591a7c64322179a9ed35902761
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15b227bae5b9c5ea9cec1a1297b0f011c5bcb76d64159b7f79b4cf787f406324
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a493d900b870334786bda3bef55b651f9d53a5c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8926d32403522277e1ae22405e6889dd5f050119b5f43c4d6b87ea49d7b0129
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..744e48d90dc79101a49e3de5a83cb843dd6db55b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66289706f1be870037d9d0a55e26df1272c3b524861c932203a145511e506cd8
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2720d4530d3751519c59b022c24cc19f47e1a1db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7aa6a8e30d0882e2d38daa8f1b00e698359241387e80cbe9379e0b32face60b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..06d58f29261874690c01c4163fd8615ad9383801
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd2c2ad603462bdeccdebe2e1b49fcf2c6557b402a47213e662736256cb6a725
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df32fd0214481d95004432ca1eda297920992313
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb8a822335686fca646c4903cc1495728d021481314a8d4793e146e958a01ff4
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6bd7f7069b9ad5bc7b90a1e2f0c058fccece9e8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c6c1a5592dfd96a531087037b8be4936c3d9759524acc41e4964623402a5497
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45ab9ad5af39f2ef20ac654d9f653ad4fee91a23
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0eff0f84f849c538c1bee5d9754db52f3b0a59b79ae197f2caa40a7a2e644e2f
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a01f5d31b3530603e6e33cdb13b77115903cbd3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87d1ecf93a93dcda1812885c09dbe19303285a5db733e63b91ae50f1e45c6105
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54269b6dcb6b2e96f4163edc4ed53a7404c76470
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa2f7e6b28f8fdbefaea800c75ad7dbec38bc478796a4de4e62f55412d5c89f3
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..61d8b5a79b85806c4adc7f0a2a8e703b13fc6c15
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88595bc506bafee99053cf3490cc46238635c021c3d9608b80a746f02f7c9663
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70355097b35d4816b47878f0bd31ecdff1b1f24c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61a4d8f11a93e710401d12577ce4a9e09e8961ff620de8ce99b71c589dfad6b0
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dba6f4f0165504816f4f26bed24bba1a81c5dc76
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d3ecb6cd998a378f8c994b5fde334fd1087000ab175f2409d8aab1ea46eb865
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ecd91d294faa2d9d53050c10e7df6efe0befc07
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:272252bf1631e0d58447b760be323ebcf856bc708a93c7b125876fef78b5285a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.3.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..40b068f053ea9d4c712a53a31b5d14f5c2d20104
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a8e3bed2eef36c2f962ef78cbab107a2c61be6e8acff4b3fbeb29a91e20ed18
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.30.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..754118231a32233572c372bd67eeca225f4e86c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fa465e491ea6b05f9dc464e0b1f6526af40d63a75345aab08aa9697e5e486ae
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.30.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.30.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fcd4856225ef3b24a0936c8f091fbd54822df71b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:592bbe584413ee3f74d20f975c52c83935907cee7104b73b955c0013c6c1185f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.30.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.30.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c481768cd0ab153c29b3ba1ebf28522fb6edd2fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5798f5c4764c53bf13e070e8e8de4c07864102feaff4fbcabeedb54a4b9d6aed
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e3adcfa6e5f3bdca5b61d1ee5ac116c2b4c2452a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c33a762cac364cfdcefc0d7f7b3b078c014dbf1202e7632fbd937e6eea438c3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8412aeaa85492114e70bd2787719222532765af2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a6a93d3fb93e29a7f261bbd92042138f8cd20f3739743abadc8dffca4c5f83a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..71fe2dab840225dde9ce97800479031c9f290a69
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2991f124d60c479d539c97094890694f61f0da585294b744e568e4bad839e131
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b8ccca32a0408e834f10587db099c1b54243f88
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37f8e2b96b6ff360f0aef501351dd6eb3e4af0b42ff264f32829cb1458713a44
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d11d6d09b317385c75e8e96657e3b5cd0fe0542d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4888e8f1683553347c73534aa4b2b7d86966f1a6d34242c03ce73b6f27cad38
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5184d0926620f58d04ce19da268040c56494327
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afc2f2d4e87f15e1b7335212518f3bf584be04550aade2f39bdfbec06a0cc5da
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aae44c92b2ba449f238ba90226ea15ce18a77494
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5f4b9af783788d0f9f8d883f3f8d463f64546bc854fc3d4d835868fe9514d03
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b610b758f60e9512a0c13798229c34a37ed3289c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d831cfc4557db40393058011bf9d3bd3da5b43586aafaefc687088a5575752eb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10c59ebd5c34a3a111f7025f61823764eae938b5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19b12168cedf9d917a1170b6f25bec55c9978b42cfebb12234af4c4005b3b8d8
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b30f4d567cc5efdd7caa4209463fa44d90a43969
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45a9720193adc09d3c4ba8321adce279539c218c613e380dd3753b20cd048a54
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a74f379f8414b1507f04183a11c3464792a078f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8233b19c606cf6f254a6f7baf0ccd828c8f4d0509d5106dac9768062b09eecc7
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42a849cd687324b41fea4e30798c7e991196ed2b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48f6e309920d91cf1a7678cf5d725a162589c8ad2ffc85388d0cf4cf30d99443
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f486592c6b115465682ed3730cbbf528382d952
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26ca8693737c41a88836f82b25b46e430cd80779a48db7e22f654212111f1473
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..548d1f17cdfe82a0a92fa662b17e354d4ea6057d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af2c3131a3d8f1da4c30be7aeaf2ae599f13b4ba9cee20e4514ac8548c6ee828
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e8b86b017b09eee1099a2be8addc7e30fd522a65
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be8ab16f02f0613498022d1084dc94ecc631609f129d1529277cb6cf95dffc65
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f78e5c47839c6a2e900b13a81bf6835b0ef6ac5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58323a404a309350d5ba981f4cd06d5e07147f89faca03e21999fa45e66368e0
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f73aec3492be3ceea7421239cf381e06044a1b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4b9f04c5c192f72a7162b55673a6b25f426742f57ddb3313f00e9562947480e
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..275c3db518a98171cd4d46ee76bbdd37cc092a46
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8f1480fcecf6bbf7c98c9e24d7cea524efa46444fabc034f68a454816112a54
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d013d86c48c618ae537cd31a1253eedf967cccf6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84efd7eaa9e5a1c406cb4376eae6eca69c5d07f22395102fa57c632008f691ed
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d12327993d56f735d6014e5cf6f2e09b1308884
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cd31282b7f693c25bf559829da93873c3e4938b64e4fc9b062298303d3299f0
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9be41c2b169c755810a893e1bee8beb3e8ba3df2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bef181d13958d3e5cd8b1fb021a7473ba6776dce59b21dee07cbddb1edbc734c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0fc57868d1196084b47aee23fef600f33b0443f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3cd28bf32faa999d6f5c72626dbc7f201d91af26a1dfeeb0f1f77736e88f68f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8c22043888feb50c43d572d73cfe39747fdc2b8f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:473df4eba6c2e2945367af2377f18b4bc06ae192c3dc39a9679fa438b59535f7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd9b3eeee69fd23fe0ba7224e7dd4e81a92ac40f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b19b272cbbb7e3f543bafcb4d552d5e51aa8017bbaf47e499bb590ce8b377faf
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..53eb5cb52040527bad800b17b82e6d8fc0acdd25
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a139955a0ef61fe29123888d19b19578735b64bcc3e61955b21d8329187f03a
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a66b7b2a6199d99811f7f20cbc492b70095b471
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bdfaef32a80557115136f8c6158b74e49eeeada622570960948a98c7726a27a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..062cf2ccf0a03c1716ebfd72dc837468b8b7fc6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39a5f53b5a61e21332b9041c411773db581be0586dd1e4feed09885aa247329e
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bafa6e5dcada184cb7e14bf92841c00adcff67c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a36f2ddcc4955a88009e5aa5065c74e98d3a58a7afd0b693ecf729263ac5fd8
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb76b66d41050c46eb3c9c30ec49318c95092d05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75cf464a74a67268ccc36221b83726d828ea970c678aa3bc19c6ff062fc6a872
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b1e0ea1665430931fe6fe618bfdb79c8a070fd5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b99597f908e26b1ad9f463b5591696b9fc1d094355585caf9c0094a1811fda2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39659aa29ec0b2109f3988914253007ccdc7c813
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec7af30dbc6a8f339ee18cb2a4c665270173d469b0d44744f1bd4248d699b158
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..07fb24a3fd3961ae6621c9bd36ea70b2e1cc1707
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:043ef6eafdf2154da850fd76efe056150e03d8d6b435568800fe870d95807fc5
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f7589340283441d01ad1efb6bed911b9aa5921f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56cf94993e7284a218aa61a8b92449b331621b50a8c5c31ce48fd10d30b0a2bb
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..782d9a2c3c282b9df8affb5d0d91ac5471474699
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28ee1f537ea1b86c41c5f53191cbeeb0c8d1b8406d3b959eb79a06344c6015f4
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..270dc038cd1605e4a85938f43fd909bba52d805e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9dbea5a37a05398ad0ef3d4f051d82f5c63ec41a23b1f8b1291644a36da6d013
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f7fe85b5f2bd06b5cba7a3a38ba47a5db2a9dd3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3b3a8aca51fcc9f57ab7d7dc3a28b7e3ce0b80b90ecba6ef1dbdb0303dab24e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a14d224fdb6a7ed930aa67b7a3f24b0d085c66a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17f55f5e299d43c69e8ad7287ed72bb61a54253f913574e63ff52acf0058736
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f39959823c7fb23b263b62495ba7f6e4612ecbc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1fdfb5bac58f5a118ea3b200c252601cc9c30e996d772a036e795e73ddc24f7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..813c2bdae44c708431615b85b01d235d4afce48f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5fa6f47a201b2f2b1e1f64b5ad545ec1635f15d757ae63a6dda6046a45b2bf8
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a05a7e0db2e10a5750d125faef31b8ab04026ed1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d2da10a7d2b73d35953eac4decfc535a5d0c7fd2583dbf00fede19859848bc8
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..130c5321cf1398cc3b8924c15cb9afc62ec372f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:787afdf7f91f023c098e6464878c6b673530af7d2854bc88bba0e9139b769704
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5510ef3b1303f95875a558bc3c3db26d5a578ae7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b60d2f289be0f89d1e7a86fc888e5ce2338a4b204f1c906c6169740e93e732d4
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9084574bac669759b129e06eb4d24675d130ec18
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5776e72eb8761d41c31c57dfce37954269f014f0bd4373ea385e0af6a39c83ea
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa52f5aadadc22b7e37cce4ea5f43e23acd8b408
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:143e486b2eb63a186e1e80651e7106c00ee22c0c3a422a95e686892becce7726
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b7aadf33d08c63c806900b1e42eea54a37e6dd9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a317f2c9e557a2ca05440e013b405eb3a088db1fb37cf39dbff897eeebc7bf61
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25a9551e106134e04297b698c6f7362bac125015
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab268dfa5a202cbd339f3844e5afc427b3068069cfddc8d3d59a791b88a62653
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59b286af62dfbf3641368b4b5fbfe4890f3a0dc1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4dafa55957d5aed6fe7457a4004ba1c30307447cad5c69039f6f512cd575df37
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..857798a82f0db908e5530a54d8862f76c86cb994
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff485c2e59fdab791224d44cf43548f6b2d2102cb5eebd83e6cf4fe12941dec4
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3338741c8b5e5f16b4eaa345d29863a379d76422
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4034b4e71a3f7bd0d0f7cdac7b9b946c78314cb82c98c178acd477c6a0fbdcd0
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bdc71bb99bc6dfaa592416827a13bfdb05adba7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34624608bbb48349db3ab346ad57f036bc377b24d359cd6e7b5fc6af43c7a3ba
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c16867bf74b35ead5127fa2726d2e6f6a2bdab9d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a8a45ef84eda103982ee8c25fd9f74bd5cce561d05d4884416ae09f7fbadb3a
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dec0dac578515522296404ce1207470f3fd3e4ac
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5156d05a7e93532653749845e3155c5b6fb89e39bed4b7557cad6e14b049a55b
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d11b2fa913516b7d822b5abe9c74cdaf257d403
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7963a81e6b899a30beda01787963fdfce63f79b60af2affd3d4d12e6e39ea41
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c48bd04e66dc52438ac5b3a96e0b8e1d5cb7a0ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72224e0e40ebf3dba5b7e9c7021cdce5e9e996290934c2babe49a1eaac4d7ca6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d23170e3ae40c80bb1691bc4c6b6a37b77a9864
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bea8e6a27aeac6d021b51f040458fe114dfff5a87c21ea83e6d6174bd738d996
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..98bf0b55cdb54c3a09ce839e82d83a830b3b0e38
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f6ecdcf49fda361d6d7a1abcb47915d82a480fa725fd9466a15b7b56d2cf791
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.30.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6330f082e706c8bc909eaf3478da8bec76f5105f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4e3fa929eaec4cd29a91acaa5a88b1b9e3825a2d84ca93fb51847da352562e2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.31.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..303e56a6ca9402210af1501f49db32ba2e5fdad3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:675e8680b79e61912d5cdf8a941886a155ab87b0aaf4e9d37232e39d27eac2aa
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.31.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.31.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db6d072e871fbbef6b7688f66d640c84b3f057d1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1d1ed1b263d437601c97e111239b5642b664412e8989adb5f1ed5c96558005f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.31.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.31.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39348227e2deff490e348ecf4eee6d6f826a7375
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1d22f977f55a2941e58f5a18f12c5747c6de88c07ca9c9fdcdd7e3508325943
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb380bb9193a59b8cb312b56040231e63f9440d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:937c40a928ea2fc61bae3be36dfe04319dbf5d545ce64aad0592449202278d0a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..821ba43f7985c15e904b0fd9deb7de40405afd09
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93fbc5e50dd9362a31a17dd7c4d9fedd0530d7f67a3e566af48c93138c5955a8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5b3c64db6f845994dafc307325708248410c49d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0530baf44a0b99e484c6e6f7301458bac367a1908c28614e8935699979aa72d7
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8c70d08d77faa106602df2d25d13d3530265d0f8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64101be83b04f6c2372225e8cf3d0388ef2f2e2e49173e68bf0e63f900081ad5
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c011dce7859c56995b70acb2e7c48f238b70251d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03aec3be68a31dca7918786d3de5d9daf3649489b1bf7b81df09beffe9c9273e
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..582dd236c16661395cc89728debe0e926873df92
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b122343e0b705a53f66862a5a0f365fbf2239313103b07f3e83992612d73e040
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b87b283f59334b37683a0f7db0c86ef870062312
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fbe15a3b1bd9c86f602110b83dbe924c340a15451d141cfd2ef1e0e64c67e47
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c41c1fd44ff83d1f5e1a33b20ab9309e47c5512
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16d5097b99a5ec529f3fa660c81eae0d1020b19f9d07d536bdd60d60225f44bd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da55b0c0d786fedc1b285674b3a8664484fa37b5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e77842f1267b5406b70bd6c8ca2d5a7b3286d9cb9b2d8a198095896b4ba541dc
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc8ab70621c89552c50fb01b9c56e4c702243c34
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb6697622d6a1d99e82a6e850e2aeeaa284bce82671fdaf5c904bd4285197748
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b397b2cf31b719dd1633022e59295337665c0172
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6cb5878f798da877212f554d8e28c8b6cc9c390a162e33cdd90b4299454fc95
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2118da386a892c88a459a4035c940c11e0dc5b97
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b65a44a29c339e95200b3204b2d99bf3b38a092bf41461bb51979b12f77f1fc
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0fda17aa77b5f03cfe1fbee0389b4c71f86fbcda
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f14fb28827c6f7132250118ba89b0523c43cdcfcf58d8dc688830286705b04f
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a27ec16c43213517abc6ad88295666bb02881eb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c4fbbbe7147168353891bf6f0f94f075019840df3ffe1def1e02816b8ce3ee2
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ae0efd2f978ec179b4f386b95ef2da00ddf8e1d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:601e76c802da3e00b8a3adfc4882ec9102bec772447a1a7dc4c1c1c6698112eb
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..93077ebe9326f0ec9a5ed8f046b3757b4b763076
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b174c2ed88d653a3d117b826c85e4f1461b176a03d2ceca6309a2d1f3fbf36f
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a53546bbd240e7312d5ec8b7c8387a27241d82d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3593b4be05104608aa6d10d3004798425e089509d557af47a154edcde6e4ea97
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92a1fea48e0565120361ecf06fcbea93c6119d0f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee4bc297c4ee17dcaaddd34601415bbb64235ea022a7f41e06521d1e90f8da82
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6eb051e80151be077716689b7cedcd24eab91e79
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12813a4e1b0962fd1dd741e6d6363df70c52627865d7043ad56bced39c6dff35
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b6ced02a654d1cfbec22f729309c52b8fbfd7fe1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36818996c117013cc4912ba6128b1d8ba7ff412b012552cebb53fe93fe09a815
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..87a7d0af7ce371c223e7066d644833ba3c4b7674
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:071cfcf014130d6cf50cd6466ec166ceeb0db704dd2540f5810a87227dfe9539
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d01c6ad74ef0f142e5a133d0370d1c8c7aaa25d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2abdf044a7fa0383e57408e1630fc93705697f0e1b3902e392e4ca13318db7c9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..782f54b060296e057d729b425718caa33b268611
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92550011870e5249e3e3695237cec5ee2432473bbbc31c3892ab7225d61b0b28
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ca303c092a512e5c965ee00ab6f7cea3b8bdece
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c23f0111078c1c0d7013228ad4b6487d343d83692c33a17dd71032ec57ffc4c6
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d18f0fe92f4c6a8fd9f89d9dbb197e5ceda06999
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31351fdd5b7898cb52a7ef2b8dbcf6b97c810b50d13f775f02d7d956dfaa5755
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1ccc0b223ba79da3ad4978f2fb8289cac2d3f95
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a116f3ed2d56969bddd7ef43270324fc54487e9565b03d7dd62e0db1b0fb986
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7de7e12437ade92f65e9ae8ae37fc34743b64af5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00571cdb0efe80734f37f1c1e4ec8c604f81bfce07e87e7b21683df47707b467
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73965ae6fb20a76a2a3d080ebc780312e21b2950
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2be897fa5c3c70ea842f2c9ef0b3f7d34a9a57dab1b586f27ee7ec7404914cff
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7af0211ba1ed20619a4a26019089ac552c0f77fe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35cfbbf1229c64e159121a0d64b18a5ed8939157d8b5e551c38a27024ccab0f5
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4b8c0737bed4991091b88a61822787bb7b8a54a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca02255236d1812da8416075c96b3218807fed188633c0f3a76053ff681204e6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aaf996e1bc9c2d9f8d80974f183a258183fe1567
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb7730444591422a9f35c349395084b1a5f9e17a997f07951bcccdec35e51088
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f6f86754f298b7279ec93c6f4231752b419633c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c6b365a8a4c46e7024985821871ca52b9e01ea6edaeabeb6eb71c72e22270b8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e8d9b62540de161217ab66c5abdf7b2ddb861c00
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:577dfd11b8be848a3ec0095bb8af41899e359b0497bdca33f7bbb95a973e87d6
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..248462093088daac33829f0a2ef406ce03b02b9d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:148cf0b74df63e2e45105683868f70495100b356d92ffb3c07b02cea9b444051
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b860fbe13e6dae5b05f5481ef4b5696ab8ad27e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8955e4a64ab9cba79736dd5f78fea8d25478b115d45db6b49978bb367cdf47fc
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c8a9b8670d6ba5cda59bf4a5a3332d69c3a0b055
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93ca85fe6a3264b8036f7182051318dfa9165b67a2c1ad29e9968a2a7ff2a84a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4104eb3fff1dc15bd91d4f50e891dae8980a7810
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:116c3cfe67041d5522b580210472b4595bc2254bc612c2bb715e4abeeac81aa3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..25fde19ba198d28edc7986db16b0fb576b1ccb7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35e75bb0a4007a76a1bdc1861341064577076f8e635f1d9d619bcad71ef07569
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e775f17648b120a8a33e69900c898fc8503cdb18
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4426371513014c1d07cee9b59048b609e3ab8b2cf6022fd12c5571500332633f
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f08629b0ef4a68c43cb45cbc4629371239045e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67fa80f45e23fb427369bcd0c56dfb7130027f5bad1753dbd70165bdb2a1018c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d37ccbcbed83b198238a19b24409c011c26c3b88
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22c552e336c630d0bb7b167210889fe36781fd93bbef9211fa5b8494df8df60e
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..121ead2083b5cc4b1fa51620f3afe7a78c321901
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8672666547b30098eeb0a6b1862054e6c588072965c973a5595cadbee3f4839e
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cae4f92855693237c58752d53f83bffdf02a159f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9f05bb7dedbabe33aed70483a222a7a9efd1bff60cfe038d4d7bcdef14aac10
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32ed74fac50752b98e575114513d4b94eb0097bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5503c0cee995d4347a846c6baf5f571788995f48e5d0f96b4d9801d3f82d5030
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52b008880c14afd56719986cdbe853d5d7a1bcab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e48d3ff8eab20585795db8ee1a168ebd9f718c860247e25f70b4b7c1049428b4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2146c02503cd2f3ad94f4a94e1581acaac6e49a3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80393de6cf56ef9a46a3a58e2b257197daf7f2a7ee9c164adc1ec5b8c9a34f5e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1aa586f8ae3090d0b965d20d461775ca9f78a0f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79c2f7fca1117f4dfa169458a49d3ce7894b64e17ebadab2099673a6bf7dc114
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..189e1198ae8912ed37809ccdc355a207ef29d3e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f9731bbe5b9bc909f603c8d6048ba2e934b4c13fc6a64c82123569e19526b31
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b3130a2e3a2fcf3d2b6001aa75b004fb252b2bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69ed10bb2dc484a8f4b75f6fc26a21189f866a7ef94e88ec15ad7b5bd479db50
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2fb3a1b96a7a31d80f149676fc5edfc01a706cd2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11d511b9f8242d86957a91625145c507f2408cf42e21a601a88596b675967f14
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc95e990f5a5a9c7fc49caa535b71f54f9dca61d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2f6480bd22a4357c779e6d13c9ddbcccd2fe1869c241361e297b0f3437be771
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..291afbe495233bea545de44aee03db632aae5aae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:211906b4106383914d9135238d788a2a639dda5bd66447709066a825e94af965
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6422ec4d9ccbb698cb7c54fb25edeb099f9d4456
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa7437292cc585d7741f75d5705bc7aed89eee3e4ad392f0da8b6ed748bf0e85
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c5daf3b29f682163f31ddd3a14ba77834e580a56
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c5376e8a1efc86e8354455dd3c716ea5b29afb6a559285d680bb5f58cc617dd
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dfbbaa179745d620284d66f2fbf3e3af23429e6c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f42ef215b7a518ad1c13d3994962cc0a7524524d1d958e81b131533fc9d37b8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..546628a42b20e6b24f7acf6f6b4d0b2e456fafb4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5fe702b6e099e15cdfdd538ea86cf7653c5f046df0c9d84b36a3bf0f76fdeba
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.31.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1107ad03c581122f44bae1a061f4d1edb04f589f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a526de77ddaf079d514c9f0903da5f5b8445cb537997a570d1c29c45b5c8445c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.32.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64a99dab5f541e7ffe4981551b4261879a882103
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c70fc4f4edc7dd957d66c1652ca42b8cd19e4fa2dec1116efcf70850fc1ba7cd
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.32.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.32.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f3c69db711e30858c3d2a89d2a6f0fb2c0bac99
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0db89c0d5bc17b25bb5bd1c7b222b5dac72187e0cb6367713cc05a0aa5ac1cb3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.32.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.32.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5833f290f80b35210ad45912cb2aac052583f19
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d13bf82f95f15a75b0472632c6c48179565e572c116921ae6411e2122c6339d7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2478c1ba57b32c661b8c55f2c1dba6abdd693155
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0129b5db8f4729f64ab204052f99674dbc5ddcadded92021db79fb96a5773cd
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5117333b3aa0ce3b23e25c03bf24c4be733a8de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85291e2a971a0afcb4d394b9a61cdfd016e328cbd5a2f89f07b8b4e84ac06f04
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9639535ec7d150ff4da49df0210fc81adfaaffe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78123262129b57dfd3521b908872286d17280fa0156576d883c194e92ac3fe34
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1dce7e97d8ad293b0468f08a1c7aec69779d2705
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fde8542928d60d537a8d372fe346845674661e4c74b2ea655cb7ed70a90f608
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b93436a2290480913182610c8e875a3d04b4cd7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:792d8fb178998ca61e7a79b97ac1898e101fdbc160332a4f7c2d6b7e5c70f051
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4aa75bd6f05b0e966d865769ac1b80a70ee391f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73ac772911fc73bb4799755bf76a3df6a1c58832cfb3a3bfffc6cd58adbf734e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b5bfc683912ad10f4dde1bb88e9cee7c37fd012
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30375a5b8bae04475d40eb02a32e7954d1298017a7de6fa034cf29d8b5a130d0
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..497236feebce716981ce8142aeaa3c490dffe6d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:815b3725da700236c4ecfb26ed5128c495838c8cc47fb4e61dee3d5256130bd1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72970a88e11b6ac6ba743e63538d5895dca45fef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e88672d845d0dfe1ec6ef6ea2bca5bba28dfb2dec2728b75607a8b989a643fef
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9600859ca0f13f9ae84a2c186c91593993828962
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99981393e285858687215fd868080ec0f857119a1dcc7a234b258ffad5abdb67
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6bd2606cb9516203185f29ca16498d8602509cfc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b5fb039151a438f4d8b05227ee068b6a0b4ff0e4e67d0b8410f9528a63c952c
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10bdb6fc13f48078a1993c5fe13055c3e161d382
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4feb316c09d16c7b9ebf2d0831133b1a024cf3e44fdaaa6cdd6cd855085227b6
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39f6fb6d308f88f9d940ea629c19902a17fea3b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:430a2efe961b60cf6edb5d15d77490536c29093f47325caffb5841a5f12ce075
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c72ce31838514425b70e3cc287735fda8afe37bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e17587a97c89a6778c3cbeecdc6ad3fcfe2ee6fbfc9dc1a37bc5bb389374ee36
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..732dac6ca558c03f85ee1ea4aebde47025378ce1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c77a28c7410a8193fca5eda6c3106a2e43dca4006fc63441f914f253cf291e99
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..486492c3214de612c16d5e38b72d05c2a9ba5eda
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d89371bbf92402898642d542bc1cba169cd4349bfb8c2e9993c662969530302
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9429139a64d7a6de2fcc3a7c213cc0de0f34dbc0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23fdeedf899d5d8a5c3d2f0af53c1f6bc1c2b027bdb918db8423b1ce7d6f5232
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a00eae02163a01dc81d47b893a042ee5cd409a5f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e15c491b200c604639f4440f50127e72cb2b4e857a1289f3bc88422511c6d98
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..424e0ca721eb201fe420a2c8ff37a0e13a9c407e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff0c646de6e1f8897f548a408f5bf507d2386e924f91c4d618fa21468f71a9ea
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ded3696783d5f1dd69d7836c3edd95a4b3b3438
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c26b4601e48321e96488cc2563e7129ee8c50a652ba4d6561e75a63f3a4dfc19
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9dfa519921826b6e7634064713e3ac5cf55c0940
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66ec29507c47ddffb16c2039d6dd95df16a89d081e83325ec7534e3470dbeb3f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe7d96b5bcc204f52e1c2f7ae530b37dcdfbde9e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:681607781221ce4407236258854bcda9b468dd2d342aa67ec10b92cf450be38d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37891d1d0f1e3950f579fb73c9daab3da578db21
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69885489dc614f2676173e9f02d99a2b19dd66929bc5834aa4dcc50a05dc9e94
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15618098a5c846286c4adce52a599729eed65728
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10f5a6fc116f3d3c020485aed620c2a99006aae9dac6177fe4cd275e291d2ebe
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e7eac661d556c2117d9439731187356cdc68903
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0ec5a3d2a6066752a3690af6c77f650c310bfd04f0c93a96bb0d271177c2f07
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..752fb32cf86ee598bf96cc7fbe4894ae6a955571
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54cfb626ee8e2bdb8d3149d9f91a70ed99f83b7793f4b290a4d21f2d4075220a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5e63ce37ee4fafba26072738e1ff70de203e427
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98796ebe5b7e9f7f915111ab3f23cb6e47bc231df1daac573e890072e684b4d0
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b3ee224fc01045fd68aae48fa83961907abefa8d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77defdb2f23db1d36d6be6e8c92e975d50f5d231bbe4f57d4241b6770f479f03
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..48ad3ff0d65567870568fbf242d654772952aedd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53acf60559bfc64b218e072874d2ab4b4211ef445dedb5ec66b04b783bc3307d
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f19b58cd9ff022a378b884d9153b575ac1f5183
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b54166bc74bfa005ce5ee0a52da7b99ecf17922ea7940a4da7145ea487db97f5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..604981bc63b7aa9e4f123674729a2c3c628cec27
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f62bfba3630accac633971c97045209231dce69e41a90c1fc8d2c20fd8524406
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..26108d55b2c9651c8185e01d57a5b1a9d9f3ebcb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba601b8b38ab47f9dc48ad7c8345a699547d8020c0dab5bad0e4ebbf1f50a473
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ebc2821ca462a8a24adacd2968e30bd2bc11269
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbc25846306428e9a51f2754d67024764810cd59df1af11dd13843fa2aa86be5
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..634b69e19f2db88c89f10f398a56ed80751703fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42602f954e922a30214f08b1aaa392a19b3ef5fba76aa1d6caeebf0f9c7da561
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80e8b588806819b4b34d0195a1f8015b7e16bd47
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbfb951ac88d544c68c805b7b8e5f80aedd3c26e21c34f66fb8cbf003abc2897
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15b8b8cfc020a295c5baff3994d58b5f69acdfed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d958c0b157e1d9522e060fc0e751e9edaf6f35bc623e4827b00e66bfc4dc8d8a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bce7c1770bcad0cd0bc3bdc87374d7ba3c5787d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e74ae7301da3c9a57275197650093f8bbc520ea632ce68c541d3be753556356
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e3aef83fef176a2c00af42a67a9837e07d04658
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36f4a77123481f802d08ed12f61ecbea6d6c769f8a337791c5f39a271dcb2b4c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4bc94fbb25e1ad89e8f40f75510be5880302f7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5792b6a73272e5cb5a19c3f65261e3f3b037361dbe3a356ce84c8aa7619369f7
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4873e3e5aa0cbbb93a072437e92cda61d9a6327f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ee6d75dc0409bdd946e792d05a791e24a153d00c324eddc230e8528f33e9128
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7fcb373ff460c587d45fa0b1f961bcb65c69972d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51fb77ed51578ac08300955862b263a29648f940d740349de72f68c99bdf1e29
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd883894be8122be3c4ffe01c8596a08df907798
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29ebae6abaea2f72af14ebd72060eb54a8a4b52b41aa1a4d3144d57f8221438c
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..516d190e8e7058985daf922fb5266c577784ebe8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d14d8d8424f2333737fb1c8c4c572ca67a2bfcaf5f637d4a32e88b890f7755b
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..701f2d60a2f8e8cd70afd30b2e9f98308297c27c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca200d12118b6eec216d4ed668c04d30be9f39e57982475642e8ada6f5a3b00f
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf88ff5a06ae03c1ec4fa713c8e1709ae7b1cf6a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac29114481a49edc2353c8a1bd2265f70d9d0bee98efb38f465b86ad19abb139
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f49ceb41c349b52eda357fba12f2158970177a9b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e2c03880af7d492a8b51f42ee1bd22a3b4b0c20effea235f1ce7ad56e963cc4
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1cae7c2fa10da8f1558f8042d6511e377fb3639
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c34ed4d02440cb9d0b70589f58afbee5aeb25596eeab626cfca73aa211be8f0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b77d1da94463c731834a7ec64b26622e24a2f16d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20db8b5b06ec5d2383d9ff82d6af1bb91a8f5a13bab2cf35ed9c321900398c19
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b6c6299094af60f73584d738220bb338be70786
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:98451c11a234edac307742a07c4fedf8df48b00dc5901aefc98d58cedc424060
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f98c20dde7c2fa40cc47effe4137a8b962d6a92
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4410199a3eaee4a38ce10d473b101dc3b1391282a379e63eee8c02f43bb0483
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9a288ea716a426af7629a7c84eafec36b8df0f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95390f8e931243b17f8b0eaca670e89320b1ef4e5147c6f51e123381d0a0a09b
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b421b75bda52b43deedec0af21dc2469c2f9bbba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02f5e6b4a9a3b98ef3b476b8a212d682c141e0fdd3cd74a6fdab1b101234eb7f
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b12f47162951b73d5d32329ed9013b6298419136
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fcb4fc715b2b04a7515b660952e134e61d45a51ffe5d95c0c3ea6161226595b
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a175bf8dcc3919b2a9bb2c0c83e05966b15743e0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be0b3dac29debf81984d0fc33f16d2bb6448253a8515dfd783c694d329b0925f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a33e21fec853f98b4cd409f3647a039a288b29f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06c699e745bafe5a7734a1c8f85480d043277e4563d853cac4fea11e762ea06d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..49d653b6c7b1c19f2c0e7a2c69f39522d914f05a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d37a51782ece327ecdbc6524754bc94e9d734c485f1dc4601b0ebae97006458
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.32.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa7d44f35b6b91d49c001fd4664e0913f41aa4fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4bf35e520ac738b175a90cc4a0bd6627b5d3511eeff891734a93299c2eb18aa
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.33.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..337b70bd102b73e7cd67544f559bda6341f2afb5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:115ff74d81e19bf1a397950b0152f6d30216404bc0c946d2a834bd7da3c54464
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.33.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.33.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..152401f2e5539243c4dbf94f537820959e00b21d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd8261850383fe4a1c13b4bd12e73d38d8b48bc2c77e7382c6ed3123fecfdd42
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.33.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.33.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f05d977f738a229a768952bd2a8b9cd399a0a326
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02e7c4152c9afa3e2e9c53db2486afed8ddefd182bf7919ba32724e549059f44
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ac266ecbe95ef2dfdd26d7f5b1dc24fa93e9589
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b85c4560daa99f22246f5d9ee63a5f6caa57dd7e1077023074f18ba66ac03f1
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b514fae94270b57e87f29e4967bdfd998039e2fd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e82bf0c1b179f4f7846d37bed67c0280abc4e2bb9a811df2d3c0cb47dd87adbc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45b63d8fa5127a23c0b9b64a0d2b1b1188db6af1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d029586d128267f1a0ba0f27f8f6e04e4f2c57b74a560cfbc427e4bc70fc56d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c31060d66a16013a016c86f43abc748c952eb06
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f64e466f47dde9042b35fbcca036694240cdf66265324fb40f312b5ce6956c3b
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc2cf67b11064074afcfc4597a429eb9a64d11d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c18b689c414163c093196d6fc16eab0b299551b3500b3f8971c73081a73ae496
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..426c7c508e401fa91b3bcca26f935e1145ae927b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ef37af821a5667b6338810e157b077142225bf701f46bc47a4dc806014bc94b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5eb2d822dd4930882075d0e3650839b1bbf16e74
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa2e83c637d7ad199324d1b8352a6129ed35d1dc14e148c1b646a3347cf9362d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f8d669c2f5f3f639e74d264d74a26114520f6b6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:823876df56e7e2e828bbb80f00eec731ae058e27d139461670748c83a132bf7d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2d63c63a47346d057c18765d530be2bc9c6423
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d68e4e7fa42a20c939c2984b9efbb00ac6fd8a3bffedcfb5b41eb544901351c3
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f0d3ac5041d15803385bb48bbd30cf0a2ef4d0e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:142b1adeefb897763e942db5560af4fc18ec787a173780588ca74f9548a76ea1
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..41cef7940fa121831727b3a2a79a0bd2f99eeb60
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a99a981869f9567ac68ee45952c563f49c2168222765ab49cd2af2adafe8738
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0fa6d6476f207ebf39814e51f2151c4a877a56f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:171fe270d877a83c8f0b4fc251300b7b4394191a50e858e9a968642c0990d7e2
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..28e2d4df868053b2b666de612476a50a2bc69758
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8fc261a64fe738e3025fb5e37f074012d7640a8a3d02d7a743d6792f99464aae
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebc079e508eb8674d2a12fa10fd77f670a0fea66
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a585b1d2410c87493478a52328ef9b5973383c37daf8f17b6029d24e142fe4b5
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c5e6827427f923bc796605d003fad87504774603
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2bf8fac0a28dc5a5fa9c9198bbffc6fe2a61a095550895d19d9e7a9a131043d
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..18cdc0be59c5e19a9444fcec5b1cfd156db896c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c1d77a9373c40b883d268d61e35b99e061b6342b08de56802476086777e4626
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e59aac44de308cd3fcd45780b8ba89edf19096f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05cb1aaf2bc2212658f334020e15d4be752e466e152261f5d5591cbaf538bb5b
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb3f7308162b43214ec29c3fa3458996666698ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca6417aac61f7aa671e266a463364089c6679558fcf9758b98be87a19c7c372c
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f1496bc9c2b1c80a3035e8de585c46dc0d1dad0a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a94e828020df8d59a34aa9257760eb6c76691faf9cec44b281b5dfbc33d7c5c3
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7371d47810922004ae01b83911b6c1515632e993
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2ac9e8fd55f0275006e2a7849d25c82174ec129a3549e4dd7527d0d9f8e5a5e
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..839c83faa49bb4478ab3f32dd0f675c7aaf801b3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e12c2b0f1d555cefa7dbc9e1348faaf5919f517586f92f14d18ddfda64d84b2f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60f2a8b2157a3b839deee8bac4d1e203ca6a35a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecba3cbaea4f080ecb41a8af545a979eb1c685be190b2ba41c74deebc263a655
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e7074777e25c57937b026e37e2351607db4fb6c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0eda3eb3deeb5dac1aec85a3bc76bf27654d9b61bfc3243b690944267ba42a81
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2acbf14ed6e24b4220512e76d7741ea4e820649
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:971d5d4023801d8500283dfa5b6556d2fc562bfc72f20c2668b1dac8c2506ecd
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1bfb020217cb548ed6dafe021485881dded6cc10
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ebabff3cc63f023f456b6ac9a29a97599dde89f6b8643fad62e85e90d3ce5b4
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ffec495088fe54f1544692ee1f4e82e498e21a40
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0050837d6f2a7d26c597e87a421e33aee6513c2e528924e445dcc42033dee6a7
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f3a5b283241b61783979712685c5f9d700a9b4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7cdaf3776f6a01a1ba0d560c273e8d64f24d0c7a5d958f1aefa96970bb1a682
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22d3f2616f1f4337cd6f0151d52cb40656bd4563
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89c2f6e543bf14b398dd62cb70c5348b7a0c353442800ca8ba89a4f53749a251
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46f51b7db5b75d3bf6eb523ca4db4221bfaad0e6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1fc9bb6f36e98c81939025e7bf11c2e133dd15390966c27111029321a327631
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..06c27c3b88d96774924468ea46eea9f2b9eaa312
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:203153a75e441d080b60c34f48a07fcb41ecb3d579e228c70b043cef8ef1ea56
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0411c9d37422832f824d8b4d082906fedac72914
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5150fa091d6cee1e418d2ca9835ca40c958c2f9cbe9791458dd70c7fcf2e4da
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..916d1f29bdb7d7f5d31063b0cf730c8d4d5aed17
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b72208d68feb1f573987572f545d7817c64f3e64aa6b1fca7dca469e4e12d10d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..647aa4370026e4edd77cd10575da78cd94d904e0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abf281d1134f988ca0a503e0c149f146be778c8abfa5a3d642cbba356aec1cbe
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a77398ceedf49077fd1e250a1bbbdbc66400306a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63ccfe75a06c05860d5a3192a5327b198daee3dc66312d81a39cb7513b4f4c40
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4a8b3faa2caa0a4d17ddf7b8f89bf8213019778
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ed1ffd769dac988a5bd142e3166f9a75c9613e600b1b31321b93003ea8f1ab4
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ee89784572f4b5da9f7073707e50a95308db3db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5217c3531d3f1b1ab0dbce97a3075a4f157cf4c09055308fb14c52e40b274353
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16578096483b4afb1907d393822b7636607acf59
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38de2981127c67da19987724b912de83534012282b2c2e8ce917ca11cef6618a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fca6bf0a8d74bcda420762b694d2ab967efd4e8c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fc384cda571ad187a75de26ecf54d7e5dcfd74e13eee3dc408281a604e4d8f6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..029669c101fcc70c5f5c0b805cb89ca91d43a3f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcdeb6c6e316a66ddd712ed7a8563fb076ae5d0aed2eb34160b4238df80a17fa
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..900aa3cd7ebea4342214eed47b6732fcb18f3c07
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dba0e072c45a7110ecddc2dabc1a84369318ea362b0cc76ee196d49270876aa
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c1da0cabb26db8c6f1bfa9edff3a59a2f8bd6fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f03fb79a3820298991364edc1c69dee35d9f01bcf706324b8a968f6e37b07a58
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16213c226e4f2f89bc8b3b10f324dacd3ba05aba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:358375eabecf125377d2ed0c2e6acec9f342d570e5ec0296581c1d87e8977e2c
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f0a9816b52f19a9b929b5d4693c997d0af2a2d5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e37d915b5918629f69b5153fa096c6fd31fbda4b8ae12b37824a1956646ec31
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b052cd7a6ce13f24f2c693211b8074d02c7b4b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb67bc279c8b235e4c1809f9d7c13a617440f6bfe8105719505b83b2d96c3ef4
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e1e351d067c861b1bd8bbb42afa5ad8e9e293db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57ac65b463b2570f360427dff206a5501d3f209c693d2d4f622922a58ab54459
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb5c999d9e64b73185256111e0bd6e28f3d97e59
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db250692f954740f4642afbc7d4a7f32687a3d6f470c94cbab7a3390d2e05a2d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7fbf8b6f1d3fc205d81f5e3eba67f713990c9ab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:787347f334ed8fb0e65c20d39344ae0d35aecdbfed23f6a16591e2147b64296c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf5a04662007a2037fb6d8cd30153e465067af9f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dad8fcd3839202f9cacedc247dd0a5ea138898941e3c73fbe0ce2464857d16e
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5bb5b69e74c9548f71842d2e694a3b633120a4b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf927ffe19d7057c71c7809cf7a9890c248929dfd0375b910bee32a2d0ec7a59
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c09616d5a938067f09dc4a879c176eab42788d21
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f9b327c5defbc905a7ad2a8a4e1c694e5c02357f28c575e47b6f233cd79a089
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3fba6eeb34e69e66056d90dbd74104e7dc18d4f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c69deaf72c1f5c43c4d475b3723660f241d4a05759f46eb27f83c7e5479bec0c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16c78137c93e859d1ef0028a5ea95099ffee8969
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b13bdff06e3a64cf51bdb324c6b917692e904bad69282bc34afe3cccad4818d
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1ff863efab72c63b215ef82f8fc3eaec5ffee456
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5d15ffb635e1585e6f622baed676faea6871a4b73ae5550daf00cb0def5c2f8
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1cd3d605caf54a659b0393cb87d6433f1f459dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9734657e395e30ebee9849f5d5b3c6d28c914a202625aca4d7b8a5a6b32837c2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df50ab34bfceb47530a5a610620df801b8289e64
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf1a1d914d4751506b159634d8fd30ca4baae9eb5e5d5cfb44fd4a9feec91308
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62ec49b3300a4a0fb6e68670ff028c820bcb69cb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b954e95c869239b7ac906560bf9a12dea313e9bcd6857adcef7abcb64a3ff076
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.33.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e296419bbf9fc79f270e5166a670c16feb1cfd38
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b093d98d32dfa8744b8c0c4e62480cfba21aa3c69486224c35b4eeb42f98d19
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.34.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cbdf357c2eaf3dfce179fc557fac8afe40710d3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6ead74a3d4415aba048be2330e92aeae1156f54657edf88a52ec3e11c092bf9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.34.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.34.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e22693b9856aec8eccd46550437d78d0bb273de8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c691fcc448a5329f572129084aaf77a9058ea68b6c0dafebbcbafb69353f060
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.34.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.34.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..736fa3a0d47a0cd80ad936ec311640a5101c57d1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e178c385107a4141c3f6fd68cb85b06526e12d3aff36c7f352a6d477177389f4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92e57d93b7ce66e3d2fb24085789faacf5136523
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44af2fd1b648eab327b1a6e6a6fc65b48a7e19bfdb5282b1822f6d97262ed93b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc716b9436294bbeee04c196e5af38a3d0e6604c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96e702d93aedb0ea5aaa343307dd0401d96adcc825075cf8f18a3bcac214574a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c1f289eb715935f53ae902a8c7c81b891cf4057
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a328fc8a8ce39f8a7652420b117c7225d7593f6b6a61df5f2581f1a85d9a1704
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d17db747d86efb90282f05f93bad0146ec98859
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1c3be6c003ec1a557cd38bc1022bce3858d31c7178c64edddf9dfa1f8f81297
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..05b74d7cd65424af89429b616b6b2eb5c0e20858
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b97844772088048e1d2a94f0679930fb1ad245be730910fafbcb1f162bece6d
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..85ec421d7ae6f102bdcbe22d9bd3f32eac011ff5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f7239b175507f26b4b853ded8a8b8845fb0b598debb9f6d0bc283a7668c0e24
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c58cc6805a0656fb0c3e99de55438c784439a2bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caf91c00f7353e3d852435865dc56e51e156330cda7f3034f7540893b6ffee70
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9444dd0e9fb9f328e04324995d53b6d5efddb0f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14d4b09f35fe0f453be7bbda59991fce3431064e6257d27f75110b62c9cc5522
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f82638e9e5bb1664be46e13754b7794ff4cdb9e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d71e0707b79d83acef39b230fd6f48bff2633a835bd1e1f8f22134d31d6176c
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..155ca54a518f5dfee5238297d99e8717c7574334
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc87719df69b61f81bbe018eca3fbe5e1165c82f51142ea0105d303d58bd6b98
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eaada227b1569bdaea3e0a26a3a608562491a5a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a0884eea535b85b1dcab297c832c0a8ed151db5340497256cb41f0d8fbd154c
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0fd5055cf8b516e6819f532aeca08621354b6669
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12453f8547ca6fa6a5c69237cc0367ce92bd3b8f654c3a5570abca3e8798ce7a
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0ea383f4da349060a69ee54a767266a88bf64a35
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8d2d2c6bc4571248a8fe7b4f37828d55dc3ef8fb90d8c09370eaea0395e1499
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9db4871ddcdcffdddabe9d8350b7fa99777ad2a3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8089716357d79b1c1d05b66f0b8f1cd0173ddfbb8073e97849bbef2398d9d9f9
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c2271fc424b56323e71762945b68a150cfd0bca3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ee9088b46f6d9ffd8a208ade2adc44162a4739f7f2697414cfc8daafc0792a9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5895788db2cab571d39c0bb5223e2c3a397a8673
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70e079f61c55f4b10b93cbebe0105e4f4aec7e73a97b972da49c640cfc11a1a4
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d6595811792c820ea1b7de4aadaad64104f53dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3d7a948fb53e8d1d7fd772b817b0e9109025d9c8df016e969d3d6e52a3ecdd0
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10123c81ba3eeeed4cabc9092c185ffd978872e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa7fb6dc14ecd134ebdad3f9a822ffe9f2f4282aa00887605fc64faa7078e40d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3534b9bf8a06b0f32653114cfa943e42cb414a28
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65b6ed19cf68bbbfb15bfdaa379c24eb0f3e285cf757a8999b097fbf33820131
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b8a4182cb9ccd8bcff781729edc24f3e58057bb9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:804a50991d13238a72447da73d6d5116b72e9a69cf520562433a0e3dd2fd58ac
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..41256803103ba8038dade2794611fabd9a7410c5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebef2202e753d68ccc8953f95b4d51f840a128947aed91dd2d39389f6c769ad3
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d363d9249ed872a99ba2e4664a4db063c56a8e2f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86385e1a51a9ccb4aedc5bf754e5b8e85506f0ecd338b52b43dad4b2de965934
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..006463b45f7a74fbf35d8a5fcf21ab2ba252eac6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1757b51fadff0a93b8c7275c941af6177f89a4bc7fa9477785cc126cdd72128e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f28d3550bdbd513e0f0ba908836bcdadb192c2c9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0f7bc996ffb60832c2be2a0bfa0c879da9646fe2bab2f0e866d97b3a844fa92
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f784e32acc8065303d3672a7e083c70434af3287
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e629f0752135fb97319064619192b1217c01e7b87050059fff2901961d63c064
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..828f646b795de4667f1a257b4c917c185eee38f5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7338865e9010709de4c5b922c14e2292f15e806580524a337b35ceee66dbddd
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..89519b7812097514396f64bc26618857b1a70bf6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11edc46c9cb1ac7e2801b1c11f7de1195e69decc3f186809684ac9555bc37215
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00e1cf01c4219a5f00f49147c0e31135a24443b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fb5a076d938a68e106ace740d2a1f2d1f49831d3f7accb4690a039930892473
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a08687053cc62fe866abd6efc63f1dded1f65727
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b488e4ed3e814c0b1427fb9999964e7587a7f5c5a5a58d7e5098ea1cd9a6d1c2
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ca97e4443ce2b1fc3918e7096abc84dfc228bb2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62ff0c7de60dc061a4dd344afa4dee3561b869390978b460509513e38dd643e6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f6a03048ae994f7859f0bb056736893fcf826966
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bab6cd0ee97f05a81a2bfe20585f0814a4a788cb0f8d116653470ef444f3cd8c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54c186134f982fcf39dfc6386369912b96695474
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18c3d841ea348bd8284dd7ff1466eb9ce49cb5df5699dc86fa3e89a98c5cb230
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff42c5af3217d1331850316a9eb5350d300ac5df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a133348b36408526d524c27d802e19e126bc80c19129e28da3864be104955775
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..71324423da1ca51873d8dae08f18ca81ba7b69fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e8adfe08e96bcd748f00731f79b84006bf9b8096bd65075d97ca47c3965f4ca
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3695c11b9d1e9a524be7610b123d964e6f904789
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4c42809f67eb7cfc95ccca2dcb447f303f5fc5e33087fe55df552bf76d37b54
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39d130e898634e49bd09563a81378ad5f01339b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbbfe58289e8b3b823eba445703a2dfb808902e986d2daf36ac704b187aa9097
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69e2ae924bef1483f39a24fb273cceb0a8ec8a73
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ef69d29417ec132269f463634a432148bc9e5ae460bce0f45e89d2ff74bbd2f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b0dcf16028b2d7ead35fc7a9a7ae2ea76523ffe2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:590c519c8d24b3f77b7db248a4ffbdbb7c1ac739bfc90fe150267ec4dc78e197
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..61d295d40ca551ad3a5feb3280007380f66e2e95
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73f0834f61fa614d2db3908b924379a6f4a9ef334781d2d34e95810f12bd0ddd
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0728daeb60484d96ae931873c6f9d332fbbebb01
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:585a9398fdd7747017d0034e810b04f5d0c68407f0065ffaffc052729ca574c8
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0216f43597fc5936c1e1f0cfd1d12ff593fe49ad
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc1f09db3658604178d40160d1384c3212eee4b380dbacfcb86c6d6bb6ba89d5
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3f13083e1a1dd0f02bc4bf5d0735716ad9366af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd1c2a5a73b945ae58e500821ff8e2b2c74dcc616e1add5e4906017a58edd133
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba843d37f523c8a2ec00e14381b5c1e65b960427
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed554a390dc660cd3170103c8eb4a64b9dab98f698f3564b243293710a5d080b
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1859bbead5d60a190fb1c7bcfe90002a2ba58b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eeed19ae0f39820eb6bd2cf27e06a0ce850bcdc8a310e92da9e759fa443e58f3
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..86e7dd87edf975f8a163f18a6541651daa24faba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb7374c0a185d817328aeb0c5a4742d6f468a606319ef7fe97c852271f2b842f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50b29590b2a750ffe0ba3eeccdb40078469beb3d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:677a25ef4a755efffe8df6e215db783b5ffc52d18455bd1bcd8ef1a4dfbd426d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eee13b1b162156be3a2a55bd8b3f86149f4d4a75
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e122d2b079a7526474dda0581fd4eb09ac5016662853ec76c66d415728f9eb1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3afc22e8dea546921e225b910af3d73dcd039f9f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ead3fc9ebdf23194b2fc5994567a38a5b977bbe609e462b2ebd9ba85a0b934a
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..27e50a3c26869fc50b71fd1370ca6059d556096f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07d8eb5b235d61be960fe1198803e4268b0fd23d37df51667c78288e07c42167
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d51a8abe404f2eb9304d53ec4de7a76615cb408f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f9d77f867b70c26386efadce0ae1060359e1eef68ce0e62197ccda6f5c32326
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..317f675e8557c76a8f8e4947e277e456d5075a53
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2af73cf6291dd772535add2c819ec9141205f3ada195eb7b43063183a148b835
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b58ab338205c85642f27ea22a8fa5de1ca51f4cd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7acce750c9e518ba011f768592581c58139cc3ab6ff1055d62bfabc6755e74f0
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db8b9e4997013c7766df181809d2073d2b5198bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7de1845867c6aa5eac19649737c50660adf7461141bd331d261e3e15db776a28
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1db0ea8a30011cfb2a33004ac790df5a121862bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df499444e2280d0163620f39ef830bbf32591102f0b5200b6acb4799bd89aa62
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e6c04c3b4371b76b973d64809715d9419afb030
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4890702f0ff678ba64c0587a3555f654550a5095ae1411e64f9eab310dd6db11
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..719878d999bf2e872cf71aade38acdd357e32788
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb4207e3e1f51a3330d3090def95b342468ab56ad31f73e7cc35a615535291fd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.34.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96e351a6e000e55faf00774c4a64a4c5bf406435
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcb054cec2438aefcde1323834d9996f20870bac8c0cbc1d7760b77c3e060136
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.35.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bb12a5d685bbbebf540bfc4aa55ba61e3f6a5a0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a214f6cf64ffd29f2dfd3a9fbc7b79ed93db6688c0c28021d87825678869b572
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.35.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.35.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7aa4843674c3d7937997ce252e057f81cf20239c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdfa13ba70a67298fd83c8810e0a80230c48f1ce94608b4e50a9162160d00086
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.35.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.35.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea55d0b1995f695c51f376c99b49b6d7b47c1a9b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:129427e4ac2ab93f2b57d5442b060b789077836716ad2d96ed07dbbe37a7de2b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..742be14cc28817bc773f91cf9542baf7c5bbad8f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd41166c9e0ea210d6f83d326a3c8d269e1f56c5f29c49c00552e827aaa06b03
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4f1036a967176bb963b68079b44a404f6dde230
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af75eee8fd1ae43a3d667c9d2ecbb16e37abb2213913139e9b31777beaef9ea3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5bac3df4acbd71c0a623704f354c6140a51225f0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfce0d2a10e746b277f3734a76cbdf7b88d6fdbf3eb29e72dca801c82f402b97
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..005821c1423b64c0c60df50f92de59b796563af5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5f1e33f3c64b0c4ce6e9dea5c4e599c436001622c1f9d552e59fd55c73cc5a6
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f45cf7873591a2576e6d9c8357dcd13c71670a2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:288cb37d9635ec60261d5abd939aa1df481d24c028b4673599fadc74db0f3d6c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3623d94c0ef57ef1813ec101c75f2055d7df776d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62fcb1764f6f7ab048fa56b2fa521ed1caf7ce2a08f24d794751de61e493cab6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d4d89f1c0c9edfe428830e03cce0a0816ce9cd0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8fa1ad84a12c79d02ce4ddab9e2ef1bcf8f5ba06ad1258c0727d014c1da1dc3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..925f569ebc24ec02e629f26e6235bf62f654ec8e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf3bbbfc4d2ee8671c992c0f87ef6257ead041768101188ae2b55f6943b5714a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..56bec0595b293137466ed3941f63de85ad8528d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21809b2d4a5f34338aaa4bf58d2869e966d58f7300256d2e4967add6a8a958d8
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c5b75b0ad2379b675d5e8d606ff07cb6c0ee8766
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aea70005e48a31361edeb180aab2deebce9b90f622ed2bbd43dcafa33bbe1cdc
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f995dda6b8c69b86ad60fc7c2f2998b78762b63b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23a3b1f08789137546507b09f4649e6c0366d10062575af40739bb83275b7233
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af0efd8892958e106a9230662cfeef40433f5556
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f200720820db17982318bc58b5a1f4c52184e925d4ab90531dcdfc5229e6b443
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..61b8cf3ce6adcaf8af74a11a0f4a2bacce83ba7c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c01250b9a60254fe15562dd803036f60b444178b8e2bd14424c3b8e18175584
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a7656fa16a3d6dfbfe1949524f86da1d7c06f693
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42452cbbd40c4ad89f9a2b3867e05ebe338c295eb7881003cc6d53c519a4d460
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed455fb729b4323fa2b65b745e70af6c67c89db0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb3b8ad6f63e7b4d77754bcf4dcc0244ac6771202b2e0268283bcb082908c7a4
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..763c95270fe417fb8767456e4e525417bf182fb2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9373866ac83164efa549cc24d299943a2ffaaf335821e6d8a481e45b8a3afa1e
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a62fc9ad867f9a2c445e1fd7433d67f195ede30b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37ed860817ef2601dc555201555f88d133f5495a7d7f7e1716fd238596951035
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e06b5484a0326e5018f6c82825f3a3d222c0f3e7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c281e82890247dfe8ad9bd9bdfcf62d12788acf6da124e31c8a8f446e10f30c
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1c38baeaf98e73242045cf1c2c2bda571c78fc7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11a7e2b68b1ba6435df697dab74f038b1502729539d0dd79d48bfdad57258aee
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b805799c4438cbcda2d4c6315b85e4c340f0a4c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0efe4ec5d1b5d93cad22cd8bd8d609c644d3514e2ef66f3294a6892f4733cd85
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b35650434a6d5454b7f316e9cabb6642f4517a8c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5639ed5faae22476b27e64a4198e022321de79ba7a025f2c2ac70fc7b5d21d4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9cbc0c62717cba49f9579e13b49aca23bd815d5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:168e6c93fb879f329c2a62073798d33fbb6e952efe5b64d85606d816cae23cc0
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..980db6eedbabc483d8041b8800e3b064d39b77c9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a9ebfb30695f36421169f5b09c5327376de1632f6ffd083e430b880c1c6acdc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ca6e312ea0ca5f4fb3a592b450937a8692eec914
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67283f45246e66de75276f2a2ff250b38dd5f8857c8fd6f083403bf8d1591364
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff0883349e60bda25ed6028726cda597b9062edf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:739a5a8587b7af7c66963c658ae32e50cef39ac1cf71e3185422e065d50b4a22
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9308aadbc3f283e5ed63ec50e191937b270fcee6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c2ec04f79c380a483e4569b448d8d06ae17529f85f96ff73841dfabf7c17594
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..123404d1687d846a7f89d70ffd6fe227a25fc3d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c638af481cbb34dedcd54c0cc422ac8fef83a834f00858ad170f853a49b2138
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3de754a19cad5c20a7bfff455411a5fd4000bfa1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc2ff238488c55c2fe347aa4e51f3f61262a998e8acd54a7a4ddb4f8a0b1a301
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09f0a7da930363cf75b414c2eb0d61fa0f81c54e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40a0f0eef5f99cc213e589c2d99f1b7a992a88f0b5061ec854be66c6df0982d7
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d362a0a3d1cc4a3e563934af8a0c899f45681d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97399efad0b799f68c9444d2ffd73b89db246ae1367bf5de7fc844da1c6febb5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e23a43d0b243cc57f82064de796e769cdcf4cd2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5252e37506249f1b86e606b9ef3460867324f525941b0fef302ada29960f4617
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a6b9ff635c5af78d4962f8136e8979edbf3324ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4aea11fe93801facbdec55fcd1e128c55e5a06d44262cd98a9da248adb27ddb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fbe6d9dd149f876fcb23d675994a19cce718cb44
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2967c94f636293344e5993ef2ba4971fcaea271bc2996513e89e6e3c50a40f50
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0496b8b0076b166d88a59d8fbe63d16e6852492
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc606f7d480d2680373c272ecce6d3d984a814af8e8bf3b545aa94288a7c0e48
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b0f2ca532e8c6af61f0ad9deda95b68eb482e1c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8847cd2c4c8faf9f943618b310a1874fadb8e8c016e7324d3770bbe08c929e66
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b352713018ef436294a95a699ec75bad075a6504
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e06d0d62d0598c29ad5a4a66daf72500e4bf8f65af29c243d309f6741e96c40
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..230a79f193254e7ea7d40e93580c8b197a31ecbd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:570bbf9bfa5d51c81985c72bc728ada513df9b0949db8f867db3584f57ac2749
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a103a6d8510d10c868f1da61302f74113164419a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb1e4707d693449a601881afa8fbd22e1b244085e8235e1ed591e50cefc9a194
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bc47fd15a9b136dbc38f213af9a50921e339d33
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07da46e859fccd0f32f5fc721df1c2e3717aa265aa121b46e79e150af0669d48
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..55987edc2e069b0c3a639d3899a63a2922af0225
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2ec6f7273bbbd81d88cd9cea9342bbbcd98a3442555f4172491b0b1e53462f8
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58b8c9b600ee783e97f4e74ff556d7f5a24f9006
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce876e11573e9586df890abb31e47b6437cf2a0f4847f635bfda5638837a82ee
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2097ddf47ee553fa65274bb3ba0787fea342f247
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:439f2f0038022226d989f94c82ebf6d8f342328435704b589aee87c308df39ec
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5848068eae7c90ce22b82df8525437a9fdf6b63b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23ed794717d6aa7c9a51d03696a4ad5be74af5ac8373c646c9fa79a0d497beeb
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..930a1c82ca97ad63515e6c6affeecb9ea7433458
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90bc854ff9226acaa1024417477b7c5a10bf1064d5a1118cce2d250b5958730f
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bbca76b99549cd0a70f81e11365a0c439f6c7c2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a036502c115e5e39122db35ca22db855c9f31063e17b09f3ed31775668e0c4c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da45357eaf8034ba3d91581c1434ead762b5965e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7711ec0edd89d83d089689dbacad01e466fc785cda3217a25eb023ffb40fa00e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab50ee6ec00b4b08c67568e7510ec31aad15878f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69fe6228d46bcd9b7d93df2750d509bed065a1f116390407a0282694145c899e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d8ae8d13f481312e8c757108e908f5c99f556adf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da75c4ac939c17f54f39ad7d3ab1eb8503a2e3922450d5fc37b39d642e280901
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..57dda43f8b793dff8fa55345d2b2f39922fb80e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a5f4b9ce81bedfeaac5a91f4fbd51c9c78bbb27cb1ef5b38767a06a49ec7de0
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..480d9e46c653d53e9f3c3d2b7f8981eb49c7780c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:902f3b16b8816d72e7075d3360048e94ca5ad5514b02fd2b992deae30beb8691
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19c6a5f0b7c8c273e8cd7f6f0e88e29d8e5aeadb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:503332675a9fb6a8bbeb5f9314693f482a5557d3e16ec15b35de98450e64dffd
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ef4aebf05f2bdb31a89766768e17e32b9044b46
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16dbbfd0d738882ba3ceaf3382507e38cf4a7db2058a6c47f8de6a7a4eaeb91b
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f3a868b500097049a6614b3cef19ba5231a06e6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:824036d90f2b4e0416f82e9ae5ee40f2b632aceb32b1152e72f3efc22b66be53
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3a2d2e5632adf6587d85a75bf9d5d603ff60933
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2bb2d05591caae0d9e0ff3174a6d9c3c136e25cf1641d54759658c35667eac7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..910a85249a980c7a758c64a64246e49e81733cda
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a071b527e58ca6bf8095edd9c86d6e7829846605d71e41f9d29137bf5e4c9f7d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..333d5dc9132b58dae99128654b505db7b6a6da40
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bc6bf15de07993e2cf24cfd0d1d30fd3dfe5dd1a24956ccb3548b96526eca98
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.35.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa98d8f12159ee8c81ab34d9079afc6a7ebc47c9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adc2bbdf14d89d9783ca24523eeb0f13d5a4a647dca49cc1e51c63e529a116e5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.36.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7535af791dc1bd4b33d2a8250ddb59b661012a51
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c6c6cca7d20134dd521d1754dac4447b05ebf2311b3358245cca88416ffc805
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.36.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.36.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..904f91251c6941c4fdb7975b89e1539a1f3ee5d6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f61c6b1658d4add6c2d3e186f428c684eba2ee5f080d100f4f26cac47a6c2c0b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.36.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.36.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..634bb88e40534cc061abb2c3341b772e57bcf4c5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57f328de04c21dbca3b7729465c5e7db8732dea7b202956ddcfe4b805a6a84cd
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3de7d50016fad9ca26fe8de6a6645493789d1c9c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c8aaa6c659d199dc4df806408152cb3da613a135728a89bf64f2b6acb7e503c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72ce4061c975bb741a96a8e667e11cc42ff3d040
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c97281321e95043411af8907fe456a0099fd4787781eb41828406fff2d87ba4b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ea87e0c4e6b5f37342ae0be506616e251efa738
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d2bbf5de5765cfbb68e826724a9894827d4dae0aab6c3ad8bce60a1c55e48ce
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec7f3487697e988383f110858f330368f84b5083
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d53dbc684ecc456e872c222fb1c3777d5eed6a2718220afa261aeb4312776e49
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b56680dad9936bfdb5e61c4cf16a297f77385d20
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfaa44ce272e52025276767d51697008efa2a77a9bb8ca0bc25be6064a3d4cbe
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b1494396b5c0bcc420aa4eeecfbb847a79e3f3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd68824f89ba74ecce2f8523f20fa6afc66e8217cd8e577ae2ccce098b399f46
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9ef57569cc237416da2cc30266ca42817aa74b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:769c3bad6723cabb86630361b3a091240fb8322102916728abedaa69a0d7986c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c648fc12ab6cc9414404692cdb7f7100a0ecb7fe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc1055444df47f97f41176eda5d873974d674286eaa94ba823a2b0ee2ffbca8c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1bef0382ac31850760a33a0949f9edbaaff4578
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f292eb7c7500abf7e0d8fe39f6ffabab675662a7a29d057c0793c3c2ae3b0aea
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b6ae28c03c746333fbe261b8438af1d668c62c9b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f98399027511d03a87adc8613878979199ec47d4c740ae5f887b95856a0a7c92
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..93107bef45c4937739e566862af97ccb6aa68393
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c388cf0503a4192e72ba7a9241cc7fe5ea80cf4383d0c80e705136145a5f8c1
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b71ecc49034e51d99b730753c00009be3c9098aa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:884ded930e0aa04a7fe8240e941f7f3fefbf759269e98165cbc235e19eb76614
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0b3ba9a23a0ee3675261b81fc2d1140dc87d617
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fa0e28883379ffaf7e4975c76d0366c40e15bc8d20d923ca2bbd4b05a393d57
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..034b97ce730b89f97bba72d2633c586794de6eef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7934d01bce4ae60196e63150f914cc4cbc422b685b5bf55f4e613ae012901598
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7edd38beede93dcfa20b489b3ecd3f254380328c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1207ca89caab449eedbdaab1e98031870090c2a466f48fcd844adc6d916a1914
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72cc6f9bebb9b0f1cb2f546378ec566594ce3d7d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8787ef5790ff31a7d4c0481b2ef7735ab51b9edac532b83610485f3d8e103b46
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eca41c23114d5ec43ce46a3c8cb8eea9de86beae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6578d6089dd13e084559133a0565d391422e0956c6c7e33ec01fa6329f7397f
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dfb80fe5b9a63351e1f6ef0c7c3377879d2baf08
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5260f4ce938e115a5f965f79f9f97c9e9e1f4a02e409ca5ff0293938de76759
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70ef44865a759913a0580de5caf4f64a0bf9a4a2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd5c135519204e543c577a657ce4cac5e20b77d672fee0e4054694a324c134ea
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4b743e1174be896582efc61e14b04108c46e1467
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5b6f407e95b0c4e6f827c7fb4ac79a086337068341630a6ca7707622b6895d4
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..202d860ebf95166e7a9e78f5f121b27057a800ba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffae8ecc56c2c563890ae73ce5fc9a6c7c0d0bae4be90d214b889ed65a5c9f0f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2304093d365ee9d01d52ea8051ff39828d79c40d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9cf4b639052dbecc97f80c457b0a000dc602b92f117391e12967f0c2b51ace0
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..470e109511e39ffda1eeea8993a804298e832f58
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:027b9564702e9a53fff4e759e0eff58e1e32cd5d016e6bdae9afe04e9ec37287
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9fe627cb513365e713d5e1a12d2459de2734a4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c333611c8019d30d80c890588d4821f3eaf7d3004986a46d84eaacf9e0e0f7c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54b28273e0fd2e223bde944893b225ddf1c4fe52
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a4fb0d4e542f1635bbd45bdd53bcfe41da0e286fcf88e4205673912b9a23382
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f7172c7e4578bb2e487243d2eefac61f40305ec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2358e5cf3c369370647d3451526d3292094894656a4e8efff167df7b01c48fe
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58fa2fdef0f0b4c8f4beadca73999b7ba60fd191
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:390012a4b6a99b17047a9e70bb608453e2d24940a0937fc0247807de84287944
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d723782709fb7d2aadd155c93adef601442351ab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96b93d1d4d2e18702f69dd0bcff14ac331992aad06a4b8a76920b3fd0f85bf02
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64f406ecb0d993ed8c4f55ccbfd7744ad6a8a856
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01e2714d7b86569ca31a847238b59f64e040567ad5421cf5c4c454e81121e9c0
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a9c2dd4d0acf23a2a6beabb43ea8b966572dc96
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bfdc33cfae9837096ad840958b24c3d3b8ff445a8190ea56053b96f546a0142
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e211f570dc5d2c5b2f1e4643ec455143906a1ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb5fc33fe9b839981b34a3fe4b3876e6c0ad80985ed935999f7c8cd4c4585481
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4b47037d8eb4bf963f58b464025d460abd6efa6e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a38e8a64096608aa21ee2cb213c827d7906880d48e738e3ffba4f2c3183436d9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15682296403a5665e41d6932ccfaddad0504bdaf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:334c858dc315ae7b6fe922d4ce76f4796cd7f910edb79c2716af14991d905179
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70697bbc13d6edd0fed00606048b1cfa9bdc2b31
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a785217c35850aac660be56c68730d6e2910370d72e99e9f79a7edebbf75b69
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6fe3bcd84bfe74cb1f3f48a72b96dddaf31d4216
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:094a17c4664b31261ad2088a1739531f1324b439256436378089643218248023
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3a22932473e2febe4118917df17f2b9cdbe53ffb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45c9adce863daaaa780e8306de3eecc089fe038d144d42ff3123791477b8b483
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2402873d63aec58e32479a0bc2a45efa1f71012e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92dd820416104a027f14c8c6b5c39b9d78808fc5d3489d757d16f51c59ce45cf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74461a9a9444c62e204e9fe8a8bf0bd8d02d20b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc8093bf0b41257f505a9f2da16fec0ee7285ca7eaa2c540366c0896e56a9d33
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a99082acb77cd9dda5fbf93631c6c3a667e86a0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88a412529cf1a7eec03d62acf82ad6968ae6fe4c02e26993bf52fd2e154b3134
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a9a83af8d65dc0160d568bc84a9c21ea0b958da4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29fec618c521a10971fd6e11bc4ba49b2ba5a3c5999ee45e8e9996698af40e5f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..11fda7476d292a8f88cd9d1dd0c1031ce668cd68
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d9a038d081fecf6df60f1b7865deb71442871b0bd1667eeecb3876c3cc5c5e2
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5562b142c42138d4c12732649a5afefe08f5cf23
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e58f2669b00d78a03e9761532645c1c0f637730a26f66573bb80bb9768d4bd74
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5709d49546e28ef5acccab44a3e599637d986a0f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b738e68661013283c4f6b98098f9353e27483509dbe590a170380727d4b01f90
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dbd6ea84f961893ed4fc1b3436687b5041f9f717
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a46916b4a018b66a35c604f592623897a17dbe0e67a6ac5d19a162b8dcf42ba7
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee4c628c61132466f040385246d80d9c5c83f58a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e54198207ba022cce9a048b20c46950420c798d636b520c50f07e101a60e9c2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dea6d41a1e94ff258b2021d542a69b0dda3ad430
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e520b6d8dbee1f931ea1088726b5af2bf12f51686406f1e8270f28dbe5bddbdf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e76794fab77c6bedaa12e76b001b44aeac65a805
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3d07db98fa96e05de0c4fd5c6e8003e410e67334bcead7760f8d68cd3916432
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a44ec1e1be69bac4ab415fca750a36eb88078e39
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6f7cb4ad4a6823804aa3b2b1c64487297f3aa0f62c790f2f845500f34485cdd
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1fd9e9fb8f0ae2f72c47a9ed346626daeabae583
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94af037885c39d3850a7e426e80e4eed807b876ed9e746ef638602eaa8cdc656
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb801de7f8bf7ee706b83d65bfa505c1c79a1e61
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c07a17e4347a0c900c7e41abaf04a0127afb36e8550093fe0162d52a39493581
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c5b80d2062496ff4afd8ca488ae49817f680919c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:733cc9bfb714c1f13c076518feafb559db49b2b1d1fcf4233aa9758453541ebd
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f1d09bcd8f760089c53cd7ca215f7559a20bc99
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0707ab9b0a51d805b458a23f617faf2b9d6177527da022977536b7d65e360cd
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e329ae00f9ab2b0236669a6bbc4bdc700d2eb7a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0b35325ddeb2afd39e3dae0bc15e732a95b72c7d7d7d1740dd3896370db739e
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5885c2c62a6a2a34cb80c7f26a4cf50d7c6f6478
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10db8d7bc757b1bf4d79ab47b7a35648a0286e299d0dc130ccd4d48543289cd8
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9cc4bcb7c0a638465489e8212e6a4ba817b40872
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5f1da4027894e66779216e34a478b32fb6d66c1c9c2e14d7b3feb83b3099fae
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b660e3f75a37e8215012ebb3c45614689cc15a3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6403ee9120d8d5dc9aefa4d9fc61a2b1544d2202184323472fea9ae7a46d785b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.36.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e210705823b5cb40638a292d77a601b0ea7d8dc2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0c0f01eb42aa1c9ac8626a02753937f751371d95904b00971e4bdab95d5178a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.37.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09972155c1ebe3a9fa9b4073b35ed2aecf9c17be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e5841cc5cc8c73ac23de99ee6098f948354bf83c05527c1cab11c7f59de4d6b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.37.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.37.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f1b98db3a4bf2e395f24c49c435f557c84602f4b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:528546de30ffd8c740c914bca68167c436adda1f76f16ce4c6abdb65dcb439f6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.37.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.37.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0dd077e2e97a9207c70f49c5a349cc43a9270f3e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05da80f20749579a13de661208d1b7c1608c6d6bc2a540cd73db96ba7e3fae03
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9fdb5a9376e86ea8c70ec0ccd0000ea756a7c4f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e7e2282933d0ed006f0832b3eaab667128a7a39efd3fe8b5bc1bce54323742e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52bf023c75abd41137057beda9425beae5c8acfc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5112d0d4029f6111b13c56e6ee72c83bb7e4d6e26e972b4943ea1d63e1f252d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b072082671b0c41d7c0159e7a0548dce330f929
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc3c3cbfb5a9b10307dd2ecf58d6e70847b0ec4f52c4e69d0fe521f2b035234a
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..11654909b1c6c8c36cd6288bc455e3d31abc3d57
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12960db2929f71f8a95801af9d21f1476d0e76bde02e5df19e79bd3335fadd24
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1eb7e99297af130ebb6bbd186960d8ebdd5c6248
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fabceb844ffa047f383574b5aaa5707ce1b8bf9f4addc5fd31c89d4e4769776
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46845ceb98e401473125aa13cb68444518cbc7fc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b24dfc8075daf76866747cd68e329c82526ccaeab779b7888385918de8e5b0ff
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ec0e3cd19c6cc5cfaf2f9692dabd868ef1ab04f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25f85fe610154f99ded68ba1903e642da884c75b28990f7540ffa83f5f5aead4
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d490b53161ca2d3f26d160ad8fc383c34483bb40
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e725685e9f6d072befc1dea39bcebfb4254c0c65140ad47830219fef8685785
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..26c11206acd67962d8cd4a1319529cdd8df5da57
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9113e1d3996bb5626b9ee276421e9880781b5dde9462616d1d1576ff2e4274d2
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6726af1cc8da01a9d99f734877944b61b14a54
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:769a8139b52b68e6e911b594f6f303f13b0ccc626d5cca4e764bacb8914ba3d0
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4262e172b622c8f7679c8f680e177e14b23c2538
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb40031fe9ef1d283e3ada6c83b9ceb10a401e115a18a42341bf50e709cc48d8
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39fd31c8155e4a70677119898019b110da546461
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56cfe082faafb7d488d531b2c351bc21ce51c05e8b2f6446c0632e8fc2a51813
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bd1ed67ec8b413c442bb969edf0fc83050cfa545
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99f97afcab010d59106fbbe9e3994a42283b69c5ff53e0b3fc07e0f3ab4e13ab
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bdf4f87eae6c9861425b259311d94993124b33a4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48155689db808bfb91780aaefb62fdaaf0d5703f9529461b86dff54e3ba1e15e
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3dce41e15bba0e2fcb789085406405daf01f264d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:070180f956b4d31eb23ebaa2e6713b10f655037dc54715e3095c887738719fe6
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7ca6285e25a28cdf92353b2ecd50d7b442843641
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9080986d31b6731ce1fb529719326a4c56778b3cef71abced977d8aa717acad5
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e59f378b74d9adcb6c6a0a4a74c6c3bb3d47055c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9c5373987a7450d487830c903718df81f12a021e3a53cdc81391dd619b20833
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe429d0f1c5d2407353de9ac9c391498d66e810a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70c5b8ac60d31db5e4966d3d1ecf95584badf1649bd4820799e8771077df6a10
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..67779ec40ba80f6265aaab5c0acb982982830aaf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f426190d1e99c1c75b7970ebe188fcbf342aa1b7cc5b5966f137587182b9395d
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2998b56eb9f6322cb250d9160bf2353fa6f345e7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad30c5afd1ba2a49358791e5fd0227ac93913aadb1cc7bd4407d3f97aeeab5ce
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e0b4259411e94294d3a0fa383cef9f4d8eb812be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:002980f044139ba006001a8a9b302a5edad44259578f1e1d16e7ef262d5b0953
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac11ca185e6755b7870fcc31e0f2ade55b514ea6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f80016ad2c621fe9b36f3980746a60edfe7f347ef7a0d567fb55504c792cbf52
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b7b19656797364a059672d299bcf9563facfefb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc6db6d0017b7071dc9a172d37623ea2b273017cd8aa402f3b3ca1ce800d1280
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5db5c7a92ac38a4fd5cc3ccb8c2bdd09e42a6a60
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d681a788085a8f013e128989ef39e29bc70cba8d0ffaa942ff9d60aca10dd212
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b7fa834edc450eb0be3baf6697d6af6bf8262554
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65ec2d8c1486b763e13dfd502079916a3066a42afdf84ffaed24562c5e4b59da
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d72e95f860e94bc3d39c52575ec4ff03df7bd132
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f69186dad5e729456f003cf742f92b70e659f1465aaab284b13e798999e04dbd
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d78574f1e7c3a579f557944e20ee3b5f1d25a2b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a09d0e91d1667beca23275097e3889814709980bf603369d01aa36a0a07d485c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f860915096c0d072b3872da43074baf20327353
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c62d359137aa2e3d7ee4d236eeabe3255beb9fa9eb5eb59fad1ee714d537418
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea30fc8e4909a7f2fa771d2579735f4a705d1587
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7e77a59ee50b798184aed7489a94e497f92d72fa2024b2262210ec3b075b503
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6a76066f4becc3a54e6ac37a8a9ab2d1f4ec3e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8735277aa970644ee9c287488fa0d8a382ba4856bfbd4c0fa85531b752d98e57
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..21eda13df32ef76d519c50acd03a0198515ce200
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e717b4cc234346f896748a1ded5fbc783cbf641ffc7f8e6780fe25992f2e009
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9190dd22ed5eec42817ee73baffd2de67c3b269
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97f35a0a5fd5b563c8f7efb6c9437af127bba4d641069f9836466a04a99729c5
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5af10453925feec93a34064f34abcd8d1989c48b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13b0d75839402e476e4f1af70dd6e4d4451c1c74733b3c23580d0de9c912d444
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d902e9f0f38accfa19f22802e4df01ac1a26327d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87c46cacd2815e28cb7ab6c66d5788441f913c8f8d062b93c7fbc26aaaaf406e
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3a0d8cacf220b2919a9a39752a04c1ed74724644
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:699ed24cd61fd726a02a50b899743602eef67c782cd92a204022111e6ef7de9e
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e4e77f331c8420af5c09be7226a72b44ec63c70
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec09c023e87cde4812ece21b89e146d4696bbd8bc5bf289001e01c9c8fef156a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ec0a6094a9cb63f7df91bcbe661a1c1813e31bdd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:289bc7136abc060e40a1b639ef402b4629f370c3bc1b08fa686c7d4c80f79e6a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..88e62a3a09918f87c6c320fc0f7ccaf3b82c1fbb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d85a41724a85dcc240dc6c423ad0f186c8d5778e1d0297b3e617060f113db4c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7fe5bd094eafc63240bcf8e184e6dad2a58724c5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7c12ff9a4c9693f54e140a45e285651ba0591a2b2472f19b0bb472a219aa5ef
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..289c4c998252e424bb28a230b735d0f18c20a088
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d671d74566aec28fc05fcde6b63938637010ca44495f6fa3f8c4184dab040f2
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d392717ab59bb2e9a9d09b8e33bc0887bc7ef276
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5feb29a8d99ae9d6b4fb2d90706508d256f789c8f4e8faf7b9bea74c36d44ed
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2afe97278fa93944f2df44c55a805fd42e88116c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41764cdbe5f6a29eebc24fa8b55157c7cfcc9dae0f16d6702e0871789a75fad8
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b29b9541735f80392e1e072295661b51891588a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:848bfd9c367ac4928617c253ce39de65a2b847986bf0367f48dfce65a0e402be
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6610a1d16476a9b31dd603122cdb3aa62ecc55a1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5599307fd7444f9676f80e10132bc554e1c10aefd5fe072c7dce083fb14c4984
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7150aaf4ade901c02f6122b0d0920d8c7175dab1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e7209c9af557aa86612476907bf0f3bf5d8a580439579eddf38bc5bd26a2bf7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b18ec3e21c987e6df0bcb9ae084701a309bd9e3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0997ec440ec2b5a8eadb85b36b7b3bbca509deb011be5f38d633883f0b00509e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cffdf968c318372a1f9f0da4c482651e1cb0146d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fca42198f4b29e46b1916b63fbb86a6d2d4e3a35d766185093ef4806ed98b731
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3260aa4df51d1d57a094615654da028f2babccc7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad5e7e8e7dde5ad2b26e01086b5f2cf5c2238dd11cee07bb4c7dd5b88f8e3584
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5ed8714a047fc9cd5c00538cec01dd0bd63ad7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a7d1bff872bae3af48db3751d23e683d43766bf8bcd87313af17d7aae071e52
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb467db124b295afc5a28f87ca437e04b86cc03e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e66be36e200e7324ea384c03c1d0ea6f68da18018e11e1b7b1af51bfccfac588
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..137cbb7860df0d2bd73b265f0f90752852e984d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47fd9249c632466ea4a4482a5a38243053040e62a932163176ee9a814fe4b3e2
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c8bb1bb2753658d7547ca88c6133118603ed7e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b93e7699f6f4fcdfc1428c31f311f160e56a0afe7fb689611279680023fa47eb
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b14278ebf38b631a13b64f8fde9cf26eb3ad9a58
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b528507bb490c5be68e047548373e3b63c675587f927481b7a70ec126684ea6
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5a0090b419509114ff29b3065af85ed8fa2d10e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f2cf0999cf8c6408b0a296faf1793a501a1438cb061e23b17b4c95bbc973413
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebd34824a0d6faf2aee2907b931175e7eff6b2ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a390986a2c6cc6f29459713b0cc4b5e2f56e7936d1f8ee7c79bc6725ba503a5e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..356477081bd562a8a221a3e14060a47188718197
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecae61bc6b3314baa2f4e467a6b8163801ac2b94cb07f48c40371eda9d5ac071
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.37.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6bd1f1fe7a26f6e48eb06f26c11e8189b3b1ae5c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a7fdfc656e920bd0392fd754ffea42f0a4e172008fff9decb49f2db8d378d33
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.38.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00b3c5bb3fd0266fd93a6f57d77b3b818e49e496
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f930a7bc0cf32ec9b9e8bce104aaa1cbaad46b7127ad568d22c266bae476fcf7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.38.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.38.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c30a34140d3d38b143042895ef3d06a95c37d242
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58ccd68d48432ea03120e8b03b3de06af331149c58321beea7449bbba5438e82
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.38.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.38.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..618e7c2dc1ac0631ab66074535ecf1cd880ed4b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:679b8ec4b38ebad61e002fdbdb5274feff7d609cc9730a3999d209ab464ac1f4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62904dc68e429dbfd480693b9bd5139a0267361e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a8569886cb6246d32f1322d58225131eede1eab5690e629c251f8694ca4d766
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6f83b6f8996b58752674f80f4d74bc6d4ae6ceb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b442c6dd0f5a6cabc9bb343b2a55a1dc414a02fc8d5094cf20e6fe59703f863b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ade9fbf5015964f2e5c598bf19cf118214df1ae4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49b21f6d40f59c4ab8383ee4a31fe2f9691e284c1d9aae3d83953cc2b9c0528b
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f09b746e798f382c18912246d0dbb881c35a8aa1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73450c5a0aa06b68e9a07f90dc3314d894afe78c9176be9bf16620221a6f6db5
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f21736071128182451b515faea4e45141c8e4e9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50b6bf270bb9471f176d1bd8678067b49038f2fe7d8ce0288964765534e5a68d
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1ff20a2741423ae9236320d0218324ee90197bc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5024ee9dfa723740c4242a88bb70149e57da17a4378c6cb2e44f9d61ccd8c5e2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a385d22c7d9b3049d62974d716063aeb12954a9e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b93ae6d6cfee264c66a5cf43e38ab19bc11723e878c9d29d744a2e2ebf32146c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e2e9bb9b75a39670e9b6c293d5f012a6dc9ae22
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdf37839eb6d80f5bc847c7a8054812a80c3961bf198666d2616043b58486ef3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b772ad240eea56b913436b1dbd5a44abd000221
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a39aa27df89380674fb94736b656a3c658f2bd791b98198cc44ac75c41bd71d2
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..959fd9c3f682e386ed2f9481a05f41ee81c94f0d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:430aeb4722de1ee46c5ee1f55e0f06e31f6c4eab8da00ef40722f0a4ba46c861
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b15d6143bf1e6e1c0b16bf11ecf778b016fdbd47
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c8ff756027ab146265763b843dd93c9024e2c9bf70416a26acab7f355164f2a
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58e31b79ae3b95eec293988d3271fc962ec1c9fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2526bd587d813ae2ed9d4a7dc9b88f5d04457ebedefb1a7607c5b4275bbef271
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eaedf7b06c20859dbbc3b7894579ced17e8d1ee0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a91ad1d2eac2d8c88e8ac7d47af0761aa347577f6041f4ee503ad2655824d02
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..909bf3827d685047732a3a3fef48e608e7d8d929
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13bc0107e7d4a1c566da8ba997ee04f36975b5f836bf465e7ded50e26630f599
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee9262eb2b71135a2ddffac9a29a267dac5e5592
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3dd7086d56a3c6016efda2d5b6c0ae2745d740174a74242551d145a94a6acb9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..49497942dc1555e43764242a3df1c620568238cb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:854044822c49faf760f02cf359e8c6eeaf4d7cd895888d0373c9c7a45481f1b4
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..68e54699c927f390fdb9bf50aae759a78b1d90d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3dfac741c5c3ff6ea0ef9e69ccedd0e90c3d9075c40494c4638bdc49ab2735a
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..734b826257070a86af2204aff0ccd61c16d46024
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69646cf092eeaac9c49fa57a5bda99cfd7e09f2642e4767380833ba66eaed585
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e3cf60856d2510e027154a8f6d8f3ed9704116de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4fa667149a4ecd1f8c5f57a4f80409486f998f1a026502fee67e01a293a2f8f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9553becd1ccdd2b02bad0158b0cbe47b15ecee47
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d041278884e89f65f2e78e188a4efb00c2f93eac3c2cf01c84e390c2d4cd9c1f
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2819c2626037d85c369a5e76536d1dcc969ca79
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1e16e042043b2abdb3ebebfa956bdcb9f1bbd69604ac50d57d990693def4ca9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..57354457c9452ab1b9bbdc7fe449feadf9e659bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4be0527dc0096f6410b1d64c96e30c17e602346940aaf78b401240b3d6cb3b76
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e4721e5a142c7fc99b9b26a10151fe5beb9760f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abae6f8c74b56a5e9786ed4f85b5b562b0d240138106da44e7fe7233c5d764ed
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b93c429ac61895ac0ce8d674c08d282deea852b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:360c386bba38150b5d02618a3809a223fc3344e5c5ed2b11ef7ae0295297764b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bd549694983ebc178a165c8e8e37c1ad02c5e842
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec7e6d9071fbfcd885aaf19b7f0b257d67914b1a4ac8393da15b4431ba7464b1
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04af0a1923ced24b4b81b6570e71414dc9780155
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5111587cb6d85b46c03f62b320d70f2b15333269b478d207e1ba5b7b59e72dd
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..759fc3c8edc61d4d65facf9831f774b3a8554c97
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ef5d09a71a4a3d314fa7508c4ca2c2d2cef6965ebc0f113dab1689a94dc399b
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b6c167d44a7ece4de43cf0bbc0a0deeed1439ac
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9394eff91b8a2985c69c5f2a87f1a716add7a6deb26b190a4206a343e65f3d41
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8579d98f6393c5253e1cb9077f8c5f44b712c829
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf2222aa472a369a3c7df36973599458c2ba01c2dbfd1422a32ae93950e2aabb
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..feccaf661ab839538553cfbf5a5e3d890188e390
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f76a53c92fba29dc832f12dd3f7b80756c0151e8b91bc0f1a9b644a52fa54c0d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b728e187232fd73029ab603c4477e0328b12147e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4f097fa0a63642c05ae5735ee6d79ec500c02c7ef895a9396d387cbba18564c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c17d3fd22658697678a30465e23086cf2eb20a67
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bce66120d6dd0fd583e8cbeb30e9dc7d2822a4c69c196977cc04f598a04d3abd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..939387b8c6d255d36d5767f3f86206285d457ee3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab116d17d18b184cead4ad0b4684bbe33931768dcecd3135e9f49b6eae44f88b
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc4b5eeb50068aaaffa21bc2ce2a9875704decef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7453da5dbf26fda85c5b067862899c2865c51a5b2c4b3af647a6b200dd259e04
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d007eb43ee035ca7ec08bbb4fb6912054664797
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41b0818cae026fbffeac167d33089d41bcce9a56117dc6066b7580ed6a215df1
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9cb6d5f895dd0c7b165ca9d2be9a00dbd0c1d1b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbe67019f0d023d96dec7463350ff05d4762f376602cdfa7c4328379cb1d456e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5321cd4b308d55581a443dbf14646050afa62fb9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10608ae61b5d90a8bb40c009a8688e17e783310fa33304e8ca0987d49421e325
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e3f79f077c1600d9448e86a1c6b2c13c5d44a7b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e37fa986a80ab3bfaed69203fbff3a2e69b5593e216c39e3651b4ab137d9c840
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..53e0ae0e678842f7b13c6fc3f4c030c014327778
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2892d24be79ef41693b819528e062b09b02bf36c5230999715823662418b60b4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0801399e92df56b23108822138e6f0e04812082f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7542cea545abe010f93b369d5a019ecf09b11c8069ae2f44ea462090a54a9049
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97b32475d819e1fdce75ceec8e4913281fdeeb89
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5ba1a854971e1f1bbefc771a2b0a1fa688fae8862a8585fea821abf1c6b9926
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..883857b4f88940b687ca716202f8896dbcb29b05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ada0fdf692c56faec8fc317a86e0eb37671c665078da267d9ec37cfb6734d495
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5c9b889e24df99ae3ab437dc1d0c054ca8a09ca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c1f8a17072944a596e73c7d257fadacb45a58a19b27eea826e38f9171304e44
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fcb2d0d72b9bb6e1a16336428291ec506c8fbaa5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cce59ca0c97847f1df77a9a288f6b8a7313e98c845b44a692a0a3ba312908fca
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5717b1fc971e8fade9f8b491a9a242a5bf8e8998
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16387ea7f41a79e05af4aadd4f05379939a67014cb1be16d4040a4c1fae6ef1f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20425c7a346907df5717245568042fcefcf88806
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea4f47d72bdb6c10fc46311a6b1ca1e88b747c8e188da020e3bdddf332255799
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04332be44aaac2f3140a1d17bb5fe3143f6c4f63
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e078b26cab3cdcd25319745969115e1b5e5b3590a6d4a0b30ef659462849f33d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bceed9b4c95f9ffb2f19ec5c6aad16d3f095b42b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd006d837d42943c20aa194f8828e0b876e85947ee41f98ae79fdf494a8f91aa
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..57bae6c4067686e6e8cb4652c43fb867c1aff216
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:235d0a726b3b1a5ff4eb068b320d9c466aa1b1e5c52978acfc5312e3ffb4a4cc
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a212c03351d9f8365b660c99ea99638a5ac6b58
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8becae76493ddab85ca28f33ffd283ef039a2aa1272c2f6e3f7294c2afd35a63
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0e4ec2dd75879e382075520c18c6dcde776ff3e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:199280292874387779627e2d08112aad1444609111c8dd1730fca26b60dea878
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f874d0b6ef357f5441920f4470d32da8409ed51a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:431b58ac51a2f265dfedee628ffb516bad1fc288747d2b3df0cf5fb31f747e7a
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba746818f52381e5774448d051cc0ed483b0700a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5cec2aa9dc5ad49afe68ed01d9aee7604db79c505e70939b143ace966fc7103
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a5def01f79dec1b61c5e7cf216a90dd048985125
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6756656c706ec43e901bb2d3cd0d588cf72eceb24521e20afe86ef54127f4b46
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4355eae1c9011a1860a9d9868f911f30fbfbf252
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d50b5d394e944003a23a7d37931aaba36ae70fe3253630a01fba22c78b22ba7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..35206ceaae130517671529844fae47fcad534bc5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54b464f837c69b3ab95b3b048946d986901aaec10794f82794f2205aef52c95c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.38.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d8203cce7ae2e9fee8b14a4c742854d9ac3b63b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:501dbb396cc3343e6359ad3f961f8a0123fe4d23cee5beb0f5cfb7e04f37d475
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.39.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f2b75d5d838bde7571b32df11bb49b5cdf67768
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:338406759b5da6071e57af38142649c57c2ef9df60bf73e69a9d7e052f428483
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.39.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.39.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d42994050d5c71dff0ace6244b4b8180d50c1618
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f83e98d90cfb024dd43869263dd3f5269403bafd97c0adafe96d659954b32c01
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.39.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.39.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19ca6ef4753b8c7b692cf1b97d2da7a5033b4273
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c74229f520c484128f4184127ab5ba99cd23c7204daeb6c5a518e4cfe53e60e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..debfd130f2050fe6fa95b6a474a05462781e9ec0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d24dd119d03e495aab100c396bca84fd5c90839262932f2348e84b23ec22e34
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f1f902e6cadd642a8ff20674f42c5bd7feea615
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9bebb6d67332110562d7e141026628d1b02245944248097e8c217c0477b08a6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16c232aa6f1c176072502f419c84e4c504f44bd1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08057f94bc2be45f19760528faec24aae9d2a027052ea2251f92917082f56d5f
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf2ff37f0cea0a7b241763de5d99d07555afcecf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4033c7bba010fd80b28c1bd3a5271ee5f256984e4e8817f50639fd58e75e9786
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f776531775bdc587115e41b58d692face7579337
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d99b74df410a35222b6e923a97b0872d710e21497c9806e73a346f7c11f6e6e
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96ee4069dc59a41e1f198bd67c379402daf62abf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b77f3f2488adaee64aa07c49a4f4f0e6a2ef5ebc30f6cfd5587f9fe05a951631
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1691d8141a4825459fc992e4eac8c4681570d6e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e7bf946d39eef6274c97c70b7206a7466944d913177843a44923ae2bfe9c79
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46e2929271dd3aff443f12041beddf04aeeadba6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:880c23726c46b17954a3111b378d19b8a9f815d888f51f36bd1c29fd169fd7e8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d725ad106137f810e047fe5e90a2c01ce2b8d4b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a0968b6c5cb7212ff7448379be42ea4d51938c67fa6074c868fe6c0a39ff5e6
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2dc37b288d1aef4365f23bde7e975cbc4bb74fe3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4b811ae820885c80e931c7389e5b7562cc0250e9d6f44b8c09999e5c59069e2
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d69bac5b2ff92e2dd6fcb1a7afe645e9119e3e14
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c716fe09484f3ec9fbfe10dadefedcaf9eb9d1f59a36bc8e922cfcfbbe98009
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ad2e649ba58c61853f69aa0210b250a859aae399
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d942465ef90989e2fb1b25db547da65dcc9f2a76227e4a5e14e57ecd8ef8aee
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..be8c7ba8fb3b497ccfe1264a6569208dddcfa383
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ec2389fc5cffcc898e64d8a7ca6df8c321d1d68304808605cf702e806693009
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..03b9edd776a506d8431f1a5ee43d310b83f3b664
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7469428104b9d7bc37848bf48a97b1427271fe882235299c74171a56bd325638
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..846b8cad7a73c462d3bd945ae8194e2c22a76e6a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acce47bcd463c0706832ebd227c3c4aa233a597ddfa9bdaa3a20735d0d5f553d
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9c9839d30f25a3a82c36b0e301fcbeb7c9b99a4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5445e8ad629b9a13014617c762422b0f3aa4e8373808b7b53b27ebbc555f7a97
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b3dc1378f7b44abb686f89aa8c88d7a09715934
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39e1078e44a14d4d788d21b7cf2d1e84f8f4f7f697a08a7e69339c0a0702206d
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6dc5f88dc050d77ed63390cb4156e72b2d3ac117
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11bee38cb06556a92f958cf1d6d31d7aab54a34776e5944a2c8a2a1ce719ccea
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9f793bbc533fa2584d5f1763890be412f9afc4a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ef5469a81482bc68de33de47f211fbc8cdcf64f8bb1d2ffe190e58872eebe3e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc7b86ae2a3c5ee7432766b1ab8a2d23a2622aed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac35d0fa538382c468413142df6aa6ea6e568c44431c84984edf944bc61e83a1
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70f1dc288eb71bf0cdb541862cf035af16a4eb2f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b4620e3cb5251ed55ed38c621c1ba1c3b6d1ecfced219b85a9e026fe3598b0b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50cc708a1c9904f0b2e3a0a0a30800ad703893f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b8988f4a352e21b64e02a1a58f05af8e7ed19b50667273a314c6abb196a38bf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a9f7050333fb7a783f88ed6063dbb071ac34afa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d1f4945e9153013bd4eebd18363f6a66ad376a01f18a82cfcb7206adeb410a8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..18b6de241587caaf11912a7bee75969c218f01b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd06271ac92b2d31a7fb1227ba765330f8b662f206fa319b0ecf9cb4119cd0ce
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d441c5f624f52ded1b3e97f3834414f317fb0699
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77c1954815917e4b8edc8e98d65f89cb461c28f38e764b72cc828615af9253a4
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..efeb62f2a9dcd32bfcd5fb45a4745bd71caf5da2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0881f6e652bd3acb361f45e31c9832fc313de9647bfe60405fe7cedb3f6b2f1a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5121d9bdfbdd3bf0a6d113550dbae7c9c978dec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:983183b5590d0eedd1f9370030fa17a2586810a581a8f4d65635873b2c0f3830
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04fab11ed3e79f891636272114b1a59fd2904264
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c201ec58d3924510e5e82a51605f54ae00751e8584fe4e76ef8c1f929b2a0a5d
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..873bf62cb78db7976a5bf394538d28413c7d1b7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f41f6c4c5cb844bbb8b95923fe5fbac6afa4331f8379955ef71077b2bd82e465
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf54ca8033bb71cb4f864670dd6bba312e5562fd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c7ae576524390abdabce9b78074c9a5cbef496a1e3fa6b1ab9ccbe477c6b9ac
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa548b989fd36ed1ab80342b183d451e0674f73a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f75041c455fccca0138139ccf7b01b7c5527ac1600cd6fa54e7816e147a5cae2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6327c35686bbbbea812b7baf1aa4fafebfb8566d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca5682e0e3d03e1bdec37718c126649301cd52e0a4ef5bb7b33002aa5519c86e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1b36c12f812b30fc0606796922285a62d6bb0ae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd8951f35d649505818f645f6da5b979073885398ec2c3704f416cbde77e44ec
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e22f6acdf479b8f9bc3e3dc04d3683e955722609
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0ca17cbdfbfcc1990e3b2a25e6b53e267056ea005f2f649d0976bdfc6c63869
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a76f2932681c306eebdc25dadd19988e93f09de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef3f9aed79b0cd07d128e69e81d21920653a0e7688868abcd96cca21dadde924
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3e615f85f2515dfa9165dd6619978891d2ea7303
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:024cdee707da376e2783be351efb3b10ce821b6cfbb6c47a2bffeff1c4886b13
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c1b56cecbeb6b07d996844d671059ac166016f7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2962ca2396ce4732081c0d36f49f9023a67d8793d7c45b5b1ff398a68431c3d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dd5a8a18c75f34a512a2bfa2f6b3ea24df222f93
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b3de56fd2429be167e1e75f3d44744b0fb408586a84d0a8bfcbb0da0cba40f4
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..26e23b372a91912a5d8fe23b50538606d5495267
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41c669054138e3b9976ce40c3c4e163753d4da93810841ec17eaf9cabfd7e86d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c2066e38c00d01767397289d77a4632d3c55cd9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ba2b0463b00c54c41ebcce64fe736decb5bd59673fb81162c357f74cace6411
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..569f1f0a019ca6bf2f00dd1ffe193d4856f8fe96
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15a66d0dede76d4dda8813cfbc52e935456df0a391072f4a96f759c9070a1a8c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d5bdc268a8c09c6443a6e9e2afc04b89d54a8e7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3422aae141bbfb5446a7d23d43302a1623f64fa49f83bec3838793f2a6d87871
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9106815defdce6ba82004998d4b48c4bf5c28a81
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc9898e282aa6090a08a9ceabca16ab05e41dae5810c5a8ee53c6021a92e98f2
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d3179511fd017a2412a3da67cad997434f2e07e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddbf7c077b507af97e33ea716ff356a6c1e58f607ce43e27e1e1953aad449b5b
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..093e558c5702b309a6761be2c7db341d1e298d64
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4ee145f84034ac61e5dbb6a4803752202ce184a4faab5db6495a7be5c3a3334
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..adb3309d0a9f5cf100deeae073a97b455b9be6fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5156c520b007bf7fb927f053a59d83b5a5f82efa3d6a2185eb67a39aa384a4b9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5227e015c369a6e665c2d4aa4e94470ae5d9817d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:079090f7771432a396f376af913826ff70b06c8a1ba95644f63b9cee433687ea
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5772e3700b03187fa7a318a678daf79f01297d52
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0025d7a4bd62f77e6afa3f611982ff869154c2a9c772d2766b882d1a6540e6f8
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1e21368366eab34d06862aaddc9cfd6f1c78073d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91e1973cabcaf223e42a02d6f04172a77882d0b57d7d936c80c50cb058e5960e
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2321e564aeb2b4db0968d7a59cb4bcef5451de7b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:722a2acaee2a48782d3363f39594907653e327e1bd46af9f1d2e9ac3de7f4dea
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c2fc035734e00de197e9249a0b38df5ce3dc7253
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dad00cd9a19f0d7cc2bc7eab393375025fb1e7bd457c382a948a41c9334a14fd
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b644b320ebca99615f11c94f3df0dc788a60489e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e244b5395b3a4760a05d4a1654b3e11534e2ee62a37ea70c7b95d05f04e20a5f
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d69eec0f7f6fc8a3c58985c2a63d7ecafbac051b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3dadb61463dcbe873565b5340a62581b80fda769edea4601c54231837be347b
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0826c778876eb9cb9e593f8c2539966029de96ab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc8848b2a421520ae9364b3c26a0f55d8a18bfe24c2804d64bfc4bfee2e8fb1b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..24780ea0e94781ca142c6374a8800b159e245de1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6de797dcba09646e13f6492882dfd5de79863316645ba14e65a9be27189aa11
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4100c87dd5e93cca24c908a663d836cfc7ec691c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95f08d1fbe4be45f147bd78ac870be22d4e800637eca5fa68d6cf37a82c9cf5f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.39.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1cab7a9a7d9e47bad63f70428a2b76f0b2c4304d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1a8334d2289598061db0f45defbee71ad0cab54a288d014bec048314a2f8a69
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.4.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..38a64b33ca1ad6fbc10102af714565c5015b0448
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:080a6996a354e5dc040261236c271e781eb7c52a22886a01fb9ee7089d6dd853
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.4.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.4.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54981ab04708634a6c8906f7631ef113f552847e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5419b4468d8c9c3c99ca36ab56f0502711e07c3f3708dd7faf68bc6c30d032b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.4.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.4.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e94b733b4648e9b90ae70f80b7283e1b49ed107
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8df95174a0506e210c65bc2daee3f47ae9d91974d6a593c3d14b4531f50d6986
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f244a8090b6b0c59528e1ab5eeb4dec5d70b94c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58003a9142609a73b45da74d39db00903d865695d6a931ccc8b8e2c0fd3678eb
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8574059eb0e5bc4424bf786309b5aaa563e4ed20
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08b46207cf0ac8b70dbe08cb00614b08e2e55cdfdb288af3f7246462cba3b537
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac96f060ddb0e2e497d8b5d4b23178906aa2230c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02f480cc3f6ce9f77b55ba8850fb1436ebd053c45b808dd1663f8544aad955c6
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e706870ab8f152ecfaa30e7a2294587df884e3b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5242f12e26a2e46690f93e2781fa5350c25298d85f3e8723f97c6d7e413ba2c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f0bcb5e67e3ea7ef70ddb454c2976d32c841c1c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b672c1cf9b3dafebfa20cdac148e4926cda5b6dc4e7e990fe8e55d1c71963be5
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0644121dbb0855558586d83146e61ff61891d8a3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb3673a87a3c754c674b8da71f6cb62f81c48d3f377470d1af70d0ad5d215c81
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7c2f26fca0581807fd58261b219226b0edd6d268
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e9c565c9c98d2d9db178edd8ab8f905fef60548182a22acceadf48569643d15
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..179db1cd3b16c0a9ee904ee9a6bbb7f8fad2f907
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea0f7f7b7c8743726e87cc6e38832ae6d70b48cb5687001fd9ed5e26721cab20
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a030dd94f19974f299ab58136767a90d4a71f0f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70a4e9ce01975ae56f7739dc85359c9b44ad0e0e9a6f623e07f40ecba6162e53
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..76267e52af55522df969ae0f17db8bb5d9d6712c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7968d2b66beaa3ab7f44bae010226fbcd3afde0a34bb00e55615cba2606a9466
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..87aabdb0d9cc5ab7082a7f3efea2265f922a68ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42a4ebe9a8694730dbc4ab6df128fa3d1f0674ab7e9542c4d1b34fa6036a1a47
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..750877a3ed39af9a6cc39d196d5acdea35cd896e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad46d18613215f5b18f721f53849b329901a6580ef680e4ea71d466bdaa529ea
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1ded2c4d02d26e0de79c46f5ef22e6625e3604c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ccd5dfa742322f45c3d4503715249c05b1a90fa7775bb241da7784be75f9fc
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d27d24f7ea625094b68ed84971805a9fb4916686
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f392091754ce4854d35222a60fd0ff451b9a05a20e1c8a0f0fd2dfd1a38ff307
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c1abcb58eb8a72d495caadc5125a8a9ccf5f14a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4058afcead6982629fe07371621cd30aade9b6300ed08b02323db1faf1d72d5c
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63b2abeae41c456a72bbc1e636298649b4ec1b9c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:725345d674fdb1eae878dd5032130deef292286cd6f777c0d6397ed60e312ae5
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..638a78a6bc7f9c3fed67b45ffb4f25767e3cea07
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:900f423465205e7b6510f3da959f1c8b25370ddae46b993bc860f622e5bdb2b7
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c21f014bd445f0d0f700d3253a5705a1ad29e26e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff7342e24b6d119625050b1945ec0a270f6c71dc190facd1d2f0469addfdbf5a
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d59eb6a2750deb971239bcf90be1ea39fa0d824d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9041021fdaf320423b003450fe00fe3b36ecdd6cc39d52b50c27d9c328b8e02
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba5b6b81a562460158b4b2f9fd37399d179307ec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b647ba195707edd0d2d5afc3e4519de099032443f8cc7f6d30b81074b57b0baf
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e07ec6ae10321754da69791c825bfaed41341ae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3522191011af8c7b6cf3d2fb336ab122dd82d99d755fb376a2cdc7e87daf73
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ceff39728d032410f0dec181410041fdbcbc4535
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebbf7b8b8f5c5c645a79733c412e2dbd6ccad60937baaae38a032d4b7b379d21
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..01893c922e2a04ee53c76a39019dede21141b1bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b1a05d7945dd5ccc6f6cf1fd6fda34156ef96a8cdd27c1c86827bde3fb0f6ab
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4826b360e99730fd5f34ac8f8bf19da271f72f47
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cca456e8248a674776eb0063c03e27c7c7e93aea7460194ac7fb06f6e3d962f
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a58d0712810849b91b78018cc88f42cc9c450254
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7817177587f487a6b49ed0354442f1864bc2650114f8d7e1b58bde89072c06a0
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a6d432f4ae3a5b0b5d03aaec7bd73e18b7e0f6d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b367bedf7e82bf9cc50022f1b4e140ac8ecdd4ef3ee0ce88ccadbf2ce5f81a4e
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..65ebcab204868a44199c681cf133b53ad661a866
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c037f45fa4c49c587a8345be6b242f1f69f7a90ccd4cda15a1d587db04b7a4c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0827089f1cb3e5f59bb3672fe47aac8e8afa9261
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f5209d814d4974ad83ed35b87dbf7031ecfeb8351bcf3359883193d4a3f045b
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab8977b4afd526b7dc606c19daabbc01a3b3e66c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:230fb71ca1211b4396762d9c2521afc37795f1a8ad157a0d9c5c71f863044d37
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..23c29a5b94cbb70b82865f069fe2ff0a83e322a1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3070bc08537eb926d45396a52ddc38f1413dfa7abdb0a83604c4f598ad21adc
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..75472964188e9124532ec46e48f43ca600e33bc0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32419d7b3a8896058a65f0f6d0da413f9bcc5d56f8d64e83593514a6f1b04c9f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba353d06508e5a2282a7a250a2ec00a1da33b614
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5fbc67327339210361486e1d3c25738dcd83ec450962331d774b935573151d7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e757ca412e9bd0f004557a2a20fc7ce2f7a94f8c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03f55c0adcac710a446ff27286fcf165c5abbe3a33f4e64c362666746eec484e
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a26dc844e89960da85942241b94e80618091b432
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8df180ae8bf5898d45af17f2ebc4bb12eb8ebb6c7b1be50db19930425ce4ddeb
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a477578afeb0c11c0d2ff37ef37d87de13ac13bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65050e6e9aa27de3cd51fa48b5d377262a22af2ae0b5344e0420dfd02473ed1f
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f74324c38d41c45c26bb82013aae5e8b6c20e4f8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a60ab6f5e5f5e4d2b822ae701c1892c37d83669f169b0e6cbf3ed5827ccccf18
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..323d6888a9b7fc0feb9508c475a485335dbb1e0b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7c15a5bf32985ee27436b5e2106d9ce0491f6186262dcd756074b6200b59758
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1de5c8ac8a536a3c9f8691fb8a83d8145fda5855
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2ce098fe5e583858155b7f206a0290a3b743340f6768b7d83eadc6c97fc6c1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..048261f3222a531d05763cc1b67bb4cf1982542e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abb07fe619783639282db65815b528bfcb9cca1b5cba34b4332df16f13b165ac
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73ac4a0d32339a5a18a419ad97cd26b4d23207de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de71253bd0289b7c41c286bb8dc929f104d406b5d503202cec59305e61d08baa
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..860eed6e491d5c110462b45e10d49f8012bc7bd7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a9463234d255ffb798b92fd5cc0acd0280a800c1fd9c35014cdfe6b91aaafac
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a23c52f0aca63323bbe663dd46dcf23e0b806ed1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34fa5abbfa4d6f755069d98d39fe6454bc3a048eb9877a386ae94c5ff8a48843
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7f7e7e1fd5ec1c43f9f1dbae99a8069982edb81
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb0b9de101da703ba4b9bc55d15244b64967cc9b36657e211a64f956095ae38d
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cce9099c7006dce47f6a1352ec5510668c34d5e9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec769cf3c3d8088a4e37cd4206e34a7ba170ee31342cbbd4fc97db046a845a3c
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..efdef32872afb0ba2b02d105528f1b53e0a6f88c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3adfd2a3a0ede4e195b87720676841f76ab1cf1e94246df641a8f4996565c419
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..346a51cd5460f59a87556ad4d05ad4c9672c8bdb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c8c4ffebbe81ce42422e4a970bd49d2a4c5c9e0cec476c3fb93dabc0a3f1f9f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..893879a2071c6ab2e29ab02795dfa2ba2874bbff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d25c2f7e896af0f3917519372dea40c0763391fae9ff1f11a9a884fc662c9b67
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4de158aee6d62e9b3b06bd3b5c17c5d75511530f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3ff56b0386df62b47633bfb267129d5c6b5a06e7424f9cfb894d45728a7c89b
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b90b5c45a8ed58359b7f4392835d4c8f35cd4848
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58e111620d6fb2b500ed6b31f9243c17ebdb40e8fdc5e529ffb9d7013b76715b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c298e7c8427c13cf46a3516c33aa71a829d2da3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf0872efa49cc88622c18b871fe48bb77333ab5eadbd888612522fceb48b11d9
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0313acbbd80c3daab3efadfe571829b5686628e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a50d5e57bc30501045d5ded4cabe7086bded540eff084599ea779445af2fa68c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02a362c1299bc93c4974e69a1a17bb84f5d016f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d08ee991748a70559be5e21cf75aa6a90a82f82f260cdd28b97145914ea35174
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2349b7822e0afd45e63cbe5835373712d4089e3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3ea0af94cd21aaa5b280f686bcd0111fb5703b697e9058fc4b141caeb21bced
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4562ae42006f3be5078e6f10b5aff36741ca3746
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee10680769928f1a1fd4b04caa494a778e70baf941c61970a3febea8e4d67c2f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2ff8c5541675fd7433d86d3bb28177beee41abf3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c7acb4482b3140d2d9d375f2d01b8b5dc803b2a4b8e693e82042ef8ea556dae
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d023f4ce47bacc6d73876e4f8287429d31e5326c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03a961f6e3204fa0f1d9efe309d212fd7d2562a307d5fe3090465bed44739494
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.4.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b51bd158653f906045fb4aafb8c2c5b6f5c777ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3b1d76c84eb9f6c27ae0966a6d1350b350494c7b3f2fe06e67931652d444d4e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.40.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5018a39ebd90b32d813247163adf8d0d46424f4e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:431ad9c40c3abcdb0b3d0e1436627505eac768a6cb4b87c272f6140b49580598
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.40.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.40.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0641dcbbe54df25e7599d11a2bfb9401b1d86a7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7aa2e847ddf46cf4cc779bd19e854860b7d7728f2f47ef5b632a5210d95f7f13
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.40.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.40.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a7f923e2b58a751535155f740bcfe8e6beab3fd3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51824f470d33ac005dee337c0e46092c4cc4b0932d8030834c8953c85db5796b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..36b606b16cf5fac11893286a030e1b38aad03c80
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0aa20dce46c9c37152d32ab6a1cf56dd09bad78ba5ff37f9695b75fec5256ac
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ac708500b650677b1f3bf2042af01ddef15d05f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c73b500c67beaa954591b0366fc7e6247bd07f5e2a70fb82453b0fa6973ea7c2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60acaaaeb22335c84dc1df30e491065cc0bec082
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7236f70747cea02c62f92e98c602ffd015e08bcfd2ff04052eb772f2a1a8bb10
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a61cfdfdbf08703ff037a0a84c30bbb65fe9490
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f9c457dbc0c0a769ac68434e63b98d14f62a23f0cd09fd72fea8af55b48785f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea3243e3bb12fc6b083891c2e5792df30b90c2be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a3ef92ac88cef950ce2f3f84ca45d53e89964f4164b612b4056ea3f4ed77604
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5fb19942288c6e3e32c0567654bf336e9393779
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4401e9b5702e0e3e75f15b8cd2f9d8d79f862a86d8ba912f05d537cc2fc69e6d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6bbbf86a4b2eb89624946bd24efe1905b886703a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36609b9787d4637e66449acc855bf0536d22e1787d2779e7fb5e7d44dc5c6801
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..93b9d0e4a56822787d2defc4716c5851fcc5653a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83a9b7a1bd62d3598230f092bfcf629113ca3de22cb09f2ba2824e6b37981782
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e21e40e8f50a0b79cd6fbccdd1ac72978fa4655
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3aa1e21afd3ffeefd83da6c2a9f7555801b1a0e7c5bc60bbf93e1fe856a424c6
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b178d064accf214eda68a33c9b377300d9f51ddc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e845907181eb8e5ce2bdda9efcd94f0fdc06f6d1a2166a737e7292cfb3bec4fd
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b271b9bc0921dd2e00c397511c8efccff557f9f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c82be20d6e59c1de817818a6983d727541ddbf2e7b80a1c88b0606a666079921
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2562ce6a48005631d0d0548a682cb0fe110b452
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1704d17f0f8af340cd4a990a21fbd20ffd87bc9a966f606fcd26d783d9eedd83
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..51b81809b8cfea0bc25a2b7db9395ada9bf9dfd4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21455bbe13f09ae82f6aff4c8e9cb10d3d35707cbdf61d3a399704d3c1e55291
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7846893218808f09a3dc723efad6403eebcbbed6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05e8b9cec44e78092f987e3bfb664ca7f671566dd4ad33f900535f0736b4fbee
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ffabec45f946423db60bc4692acd8b3bc005d98
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a938ec27245a11ed3f13d3da5d1696fdc7615aa09c97dd7dec00f79faeee171
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f6952faddc1ffd47ff2ca914cc3e94eb4540556d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1614c123e216260b0d63e1d7a3354a0ddb897549ab6b3ee3a27044aadfa5f920
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2dc908c5b4bf5854a6596ab186bc7ea8aaed5404
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e97950a6f1c72224ce19fbfd75ed92397f39a890bd23951ddf68e302e379dc8
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74b64a896a4ed1523693f5335dd24689780d5e6e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c0676145ed18544231ff0ee4ac53a50c3436416510908ab3d021acca6115d1d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1e671cffcbeba408d315b7ea5e4ce695fe277bc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c3b62509cc09033692003d94418289283918d0f34fa415799bbc932ca70aa57
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0867d007e029d22d5ee2d757b688daba5f958d71
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7c2d8189706a5326f55cab161f5295431278c3b68b85c244ca94bf032720b33
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1fe7ebd3eb3c89717cea768caa9ba368fdd53a72
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad9ecb0387c86f454c1c16096f62a6504dd5a4abb34280d41b4b915838f4745a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a3d6dbd681740fc8c5c48e622345decc1bdba698
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfeb33cb7c4889924d4dc9e6ed71d8ab06732d9a1bb96da314fed0ffa075808e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5baf94870162103e9c7c545112fdffe463c1083c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a49d572a2f943168a4ee7b0e6be242087fbd46bf9032c6bba283ef3a15713fc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b445aea45eaa6aa2742ea7ca3ebb0c9252061d0c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c9d3ea6cd9a1735dccd892a43ea954800123920d646d725f5d4063b52ffd637
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..41fd8f0f5dc47542d9bab08c8c3c55e8c9eb72ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf0fb89a4da548ab478deb7b6ba111470b059e15d91c964b38aded3dd978d2bc
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74b50fcc3cda4f16745d74d2644c59f30378fa39
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28304aa7664bbf9e8780ac9ae9380cf1f4a80d8b96eda5eed2dcf56a04fd502a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff36249088689d532b8d8bcb69498b4a3fefdedf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6b4749d9579f4450c7e5732879da92f4b693184c16e0f944c7f3bc89acf56bb
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6db0f291dbfaf6beab3b144b78efff237eb1d9f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17958d02274bfe8b6de62a40bf44f0806d609ede1609368c0b1e4b69b5396a7b
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..290b42969c770d0165466f585c5fea7f5aaa578f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b823c911cd524429ba3e805ed0eb0f843d60f6a230be8f7a51660b725ecf7f5
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2ca62f5045ec854af317314b9de7697e33fba25
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8db3bd5c7cc6bd41ec4935d97a1f1f83088fe9923ce163d82e0d394f67154cc2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa2dc35a8223095f3c7732bccb33b0075df8a420
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad42b5d481170382daf2d5c37f32edbe33aac8ab7d1637dc8f2582c8d0566e04
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92ecd8b12de3f6ad290fa85c172be262285e1718
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a97acfbb5004978e2f9829f60c52d11ddddf78411cac2f2c642ec7a8440e0061
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0f8f400ca4182d3d5834073b660a06d4464f26f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3bf9c234210514a180a5a1bb369f7492951c6e0162ab1949d3ee1a7d628c351
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..436007cf5f8f766bc895746db07846422f552952
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a567ef13ddec485df81231a9721aa95724694fdf0e44c58e45404a8ea36bad61
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c3df3f1ac6746296834d550e10cd0467d03f418
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05688710c94690d0c44fef904f75c116c162d14cb1c02c975101fcec454474d9
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c492d5fd51e6b8d987c3dfdbcd937aa0dc740dcc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42a20376c399fb46536943f63d04859295b1957799e89517af2c7f04a295dc6c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92ac14f0920a3cbc7be7e71989bf0df54a7fdef4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93e88b0d7aad8045d772101948abcb73b666566293214029235fff4388b38dae
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bcc581232a36717b3d598f61f17702054ca86719
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f97bee85890082fcf5ee20888d6b1f32396f4d2ff4e4fcf36c415076699df926
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..227789fb2c11df20f20ae4d4eba8a13b165b3c11
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2da228c24c11c48793f7f68e1b6d26139d6dafa144475db613d5d72cbc55e30
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d850ee649b5c9fbf8f581871e28b8903e6d9340
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e1d644a94846c6e3ad44cce6410c8f1672f9bad99d5c9a83c1d43c3879107fa
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..875946c57e5b2a226c0b05dc7bbea51801562aa7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96d0f119ab169d376207165357e2cabc6bfb0245d96fde491e14660780569524
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf53b36cbfbbcd427a7055e5c12956ba8ff79283
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02d1dced937e4de2c584b6de72904946b10685d223e85d02aa9e8d333da90546
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7b458dc5eeda173716250fdddb62dc7d2acee146
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a24d1398cfc2952d8d087433b8634627a1fa42e923a1b1e89c346acbd1756bf
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e726ec826b4057ad2d10a6ee0990ea9f3f8758b5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:215a143ce3ee114841d7b1c93b2eeaa2ff6e59b3003de9d44fb9a6e3a3d1da3c
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e7c02bd6b8b1c2943e56050776096e1b3c0b86d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a36966053aff441be0c6b609f19e0d740121ad36c766d0abe1bc42fd1232429f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c1b26af36fad93f7c4294ca6c2fe043d4abfb60a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:059a92af6b19305846a7383f92bf8057da70115e09734e34e13889f9ef754e9b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eb6e4bc39aff08ed70d57ce18fd3dc4cd385dc64
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2851caad52dbcaf40cbb30d8a34a956a79bc6f0790572224c74f8d718643e45e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58ae12ac13b8947a3dcb5dd2bb2279b4bc49d0c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90623aa93526f15a4f270187bf35130fada988e36dbe8677ea117240d19a6f9d
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b543a81e121e792e24b54b1d0d5b4277ecfc80f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b51179571dfee2981704b359e4f965d7fca6c6b5bd3e5053a8221033ad48abff
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c2a508ec1a0347d223e2950a5dbdb72813287694
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4428912a5e0edd0c1bc093e4c41c3520c03bcb4cda61792c9abe4c8d2ad659b1
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..53adc6c926f2e2701ed85bd16999ca7b6203d58a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0a8692889525b73031638c54fec171db2ed24cc9ea15d82c1e512c6d5e6c6ed
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c14c845150ddd5151abe500b1d9c86a7371119c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3dd2ddb2e5743ad3a914e213d9277ff1ae3bbb8e528ea6b4fa1c59ed40e34d0
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..55712318e301148b4ecd105b8310183f2478fd1c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b61fec9574a71a8e21819e9fb1af0c3a7f22a398dd431167be9e5012f91ad83
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b91a1ef77e71374cce2f07361b8f7f73eae9727
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd7ffee010d7be43c41cd5a1fbc86265c1c38cd0d843b2d6b634dbb236567856
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c84523cfabcdec5c78e22b9721cd3fc95523728c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52e421a71697b47b5a4ca72b1825ea44272d1a20bd94e3deea6985541ade495c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..348245a3acfed1780461fa66348441095644ffdd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:091bb51457805ac9a5b8178e69f5c0694cc07c45ca0aaf22af2190efce20a66b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.40.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..faacd608c78f4c2dd24adee8befb8aa105f87f6f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c71c83fd9c5cc706d4128daf5cc249945967936660478f6c2069390b546de92
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.41.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d35577b82360fa349f93a47c3d31e1756b70385
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d38f7eb66acc733c114ad7144a8a0cefdded703bf4ab8d30ca86df295202e1e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.41.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.41.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..adefac787a63a619aa6bdd9ff8f4345237314bc5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c4f70b3ef4348882df504d2b46b6f9bf35db0862173cc576284a1eac4f49d23
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.41.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.41.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eae0ca65c264fcc9cac0ad5a4fde076565f45724
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b38588ef307ef9ac533e35f99284acfa59d0180435bb78fa8f1e709bd74691d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..123aec11cfb6ab3c7c1b5a4fe4ee8981d36ef3d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e0643fd766234cdb7765956bea49e160e960782c869d1958fdd52a4bde9fcb5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d21b91262322a40954d59d184854cb6aff419ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afcbb45bf9e3f37dc8ea457be89ec2df6fdfa9de647b0770307a7cbb1c8ab06c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bfde051dc9ac860f4af763ac122583dedf659bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c516e23eff06a6ff5571dea919ff1656e88433f15059f29b8b8846cf09aec98c
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9bd9d9ee086ee3882be2ba870e0afe38e80006d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59de75eb0a08b5f212d4e7f575ecd51dcef3e49cfd2b441c4acf4b4496f046a0
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f695420651a0d3f4108f3c8d535e7f17a334876
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae8e6d02abd1aaf67aa49f77f79e0adfb8792860a7f79f1ef864b5611764fda0
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f594f3d1197f00f0aed3999bba9b431e80f62e2a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8a6e2798b9d6f0f4f067dfb44c6eaa00d23f27f321f23774a97bda0e476f025
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..407c1e612fc88c4a36d0b1b72e083b186a142de6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a61d2ab1a7f68bc0d5f9df027924b0019e32050e1e3f3e8d0518e93cf73b07a2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73a6201a1155d5658a5b9f6362a0aa9b767c17e9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7ff45262908b5e7625c9a22d84f04f13eb55632c69a7c348f10e569125cd86f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..29f71eea3490ca268b00110a3272b3ea18edea66
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ba913d288dedc5e02778872fce25ccd9e75f47b8da0b72dbd2e6e0241ddb1d6
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3add5264236dd6a612fdf9ce77b40bf44a392900
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b84924fdb8905521786a09ec9fec5d5f8e61bdf982544a09e5cea2d03083275a
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b5103697a3059ecba59e6287e861abe00dac4cb0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28bce1f6c38ed5144a89e7feb7b42af465a6d3775d79f19a0f840e07558704ee
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9f5ca13fa0ec2c25c40806ad1ada7b1d1d5e27c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bf2349ed5ad183051632990039edec30c9fd42639b6f7c785a343c68dbeaa1a
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..28a613d8fe0839202b5e203173f30637699527ad
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f856ce6affbfaad4f693b0d7e0e86a5982af6e579743cc2577d8759e02444a8f
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8823e0fa11be896d4da18692d88e337f090ac36d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:537e48a0a1eb6fe19a9422017708e96dca56a5b5cad3495dc34d6498db068193
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a16f441545575aa224c7797502120fbe90da200a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5eb4adec3032db0d0b18b9d338e3578009138eb518223bb7674990aaa86eb5c5
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e5b7bb5d9ba7e6069fae0eee706e13c50579072
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:539fcc3719691b699b17f63876732e4edfc3bab27d8fc57cd6daf6edc94bc58a
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b79a6e7fafe3be0374b8718242b64d170480906f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c634e07c3971d2a08cca3160e8771f01d4393dae9aa0370ce24982dd92bf079
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4bc30cc15884242f29a8045150fec193c5f8ead4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0fcf0f98db7c3d7bf130c27e7b1ae5d461b2d3fcdadd99dfac009c1e7379802
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0e868dbf833328096365ab11180376223bd31f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:800bdabf9e0128dae9b8c9c40fb61be52c37d4acf8c14bf37cc110117b271901
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..14cab5b2ef2ae87c993bba3f75257668d8a9f1df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:458abc9b6f499eb8f5ba388df2df09b3a7897646ac9f6bbde099daad3d4aa039
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2249fdf7dd051316bf6d76d17b5fa7320c9ff14d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a242de2a84c7ed76358164e2ad917e07f00a846994181df0ac6c0e0d6e5b9f07
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22c1939859f3a71d8efe855069729e7c43f01ed8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e8c6702bcee0eb4a532b59a9859144fb21afff47319769e530dbba0465b5fca
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f28d045e96c2e68064ae032df3660f521e88591b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3948e19f5bea0a0a3ba2aa89463e1594a04e757f249dc407db8080f7f6cce563
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2bb9b635447cf30787ea460a0a52084bff37dc70
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e57fc28d1bfbd292cf4cf3033a9835c2a19af92d570fd2d7aa6ba85038db26cd
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4de3580ab9642205a45b3e7c4a3d1b6c8f6c49d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe91f9ee1cd118231a4c61ffc28b49d02be84eea9430c981b409b02fa3521f4b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de06b6209e91a735149134cae8f5fde8d76d6508
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41a89a0dcecd79613a61407716ef7327fe32bf7d5b7ae1443548f68e4f365178
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d875ae99f9c1714aa3b9378d13f17ed0611f6dca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa1a609e276f5fe21b4a3f43b560de4e4d1a594e685df9167deab9636208846f
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1783c180b44d389119b65114e10b045547e87d8f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0fe6491a958a286ccd6f6020efbe54890aba5d329bdffcf820e3d3109cfb4d5
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c9801d0fcfa01704124e03cb8cf85fdf40b0a41
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9a4ffc161fea992f85bc24ca6b4aa19ce30ece4af0f6926236fb1bd5d76b1ff
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ca9a5233b7f6d657fed4b3a3e4d8547c447c512a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:714a039368803c23035100fabb972dbca15ea428c98ee3a96123e8f336cda2bd
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..deabde8300e209e042f112da4bb110a3f1d1d1f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a6986da5bad7acb01aa3f8b322f0254c9bbd285f3073c559f95968213d0f6e6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9c27d75bd838ae4aa0ce1c251080d24b5d80368
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cabe77a20b010e269527a71d1557ab49c0a075a2c1c655f63ee2322f2be4d350
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..84598d2dce6d49e67835d042e641644f4db33a0c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:091e88b1d60ad51f5de4679902a63a28fe086aa5dd83d908d3f3c7ec11f4bd52
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..210cd30fa35b830c25fd5e3d3826f8b2802bae2a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:feff4892f8d9ba39f55684e0489f0910b34767c04ba3df06743d60fed2500070
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cc8c771ace273b0d5c2d47d18d05e054f4e437fd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:887f90114a58f15656ce37522daf2609cccb1769748fdbce4fe48c2a068a540e
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d259168469bac1673c9b0d7f00e9bd423d9fc7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a36990ad1b2733c08f0a46b57ab7a09b79096a02dd2f47cad3b5bfd8ae33ab0
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..808dbf2ed47ed834e52119d008e5dc45cca46ab2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:839882bfa1c390d88c18f6b998be2e749a50e494e893c41ecabb8db631413306
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..28f396dfc7cf21f06c0efb5522bc193f33411ce5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0f1f16b2c28b228d475fdef1b66b777a326ad6cde6e368f81fa5c4c0490fd76
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d169e31a65c7dd7730a43c58ead644f2d4dcc9df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e6020186ee342c4b08eae148a4d84458398f859133877c44f0b8b8ad0f29b01
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ff1499e6b4e334bf32af67bb304596cc344a32d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d02dc38dc06240a6bb4ace374915cf3d9597d65e231a477189a7512b3a8ec428
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7db4a05c20e7d665fef9f4c459c188c364d6af86
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd47d27d0e5999b17b3eec5bec87bfdee53bd367026b214e9406910bb505b83f
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..383ae0c081cce160fd010dadb8d2bee89732db7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62ad9c252a541710c65ef317e10fab45560b8e7e47d4fa3ff7c33d6b7df05d53
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ccccbf0bd48b6bbe6b83a0d089d915cdc0838b91
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e87db21b4bae5d41ecfce831616fff2912e2c4a389d7b66c84a27cc15440c975
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d20696104293cece0ce5117e6e9549988e320e5c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2c4e98dbb2401e2ca89a5233e37573777af6fe4319a6270acef0b28ce53de09
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1488f29c49c6c98c15f125770b763303e09971f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f529f615ad05156765a2cf0db466af8b3f703f76c2c0356f8eb4b7ffbf4b2e44
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ef672c8ab0d2b19605b34da12920797b4f2d5ec4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84a293869a5a8b94e2918063fbe48f3b9204936b879007186d94874ec3473485
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3a4655e749ffe9fb1459ffdd84b228dde951cb6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a88be4527048d1ba0af1946978f35bc727bd805f9b4dd4089186d710ca0ebedc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f86a92d30405a1d26a6f3fc8f84a46dd79c096b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cde1d171cf579dae1adc86f4a9589ab89f2f41e502bac7defad47b1f46a65d5
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9497936670bc65f83392ee46fece2663be9c927
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21234d7b27f2de5c7a564ec41a4b8cb0f57be4bc816900045d5bfd3b6de29345
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4db453d2c5e281a0ec647d53ff8864604d0440db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97739b684cb802982bf6f081290f7a6bb5bff52b5ae0b5372513b3d3e500f20f
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a31eb1d0014e387fa67e50d0a664b50f442301fe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:915a239ee1b5740a3718575f2253547229aa786479f793b8761cd2ff6d95b656
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6945ab9b24dc049237adbbd1cc49b3f7c39eec88
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42ef77c45340466a8023b8051c0777baff72cd806d07554db08249ff8b869c97
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3bbbffbd227de3adc10528b732cab2ae866d9710
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a58ceeba3342a2ce74d3178fa4aec8d1b2d823c9cb7a99899c903c51d4620da0
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1612df4a545d982f2878ecb755f2d209a242c24
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf139978a4ffc7596a6ca111ce17a7811985b0077c04b8d19fcd50bcc0ffbec9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74af20ea6a53d03691485e01cb0e36ff5c5be37c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:244f4d963ba72fb3d4d24e86f222b57043fa4c190aad51cb88745e240ba68de3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b571a3a2442f04f4a8ca28478c134b0de506e7e7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6461adb5a35f57c521c9988857415adf6e4a80746db6e0910a05cdb9c5a1fb8e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.41.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..012ddec9da346f08a5eea24b561fdcadcced59b5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5bd1c400a071800c5c2352c08dc0796bf80cd72ca5e6ccf2c947497c4f1b636
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.42.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f797d84af40dd5bcf3ac10816366903eb1f9954f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c764aadc8e4984498f0c04cbec0f3b7e462b877569ae7872899851fac61ed5c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.42.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.42.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce9602f9dd21292cfe7a401a9ba941f23fd810dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26cd7865126c0603cb50685c249a1a2dd09ab190b03d69e2a150626442a7ef3f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.42.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.42.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b674604420bf7b3ef13a60b14248b959c641329c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15bd3e80c7d92af0a2330bf3df30a8ed6020050bab141929b3e4f4a202bceb56
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04c780600cd2eed9b8e275e4c461963ba77ce13d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d238da1f1fba40336b1ce5533ac7e4009106b11bd9bd2d0fbc8847e9d421c77f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73acd7945a2d0b2c5f6ac237a133be0c86e0330c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38febdeae4b5b5cda2499ac91f501f96de5b2738f03ce65716337fa70d30bed8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74c8b556af04ae1ab55e0d693b8f2f3b4e3b0d2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:079162492e32ccf7510fc38aef84fe99969ebb05079150d44ab39a231b120830
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2495e3903058c3628a2001cfb67c1732e4f08d5e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:932b92748173602104616c0c406475bc49bc60871d1f21583eef6f09144b3d36
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d89ba12b0033d40d36f03135372fd02b89b94edc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49fb99f96d078d88c0555858608a9f486d345e19634e2056ee4a4c3c1f365a7a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6da2d5800d4c4303cded3ec3a4e0945c88828055
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9934f170556ad600d143742c57c965b07d25f0c503a54d74253f7d8858000e69
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c14c78197464ccdebfb69f3f39397a8ae533dbd7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33ab7d2d5a3c98dd59c3f488cab5069945e63bdf257c4ca5593e844a0c5dbcbf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..210b20068a8443316a40d3b8addb5aa9562d34b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eaa4fbfc301b403c03148d1fb367e8853861c7764ca1e781e540341174b52efe
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2ba2c21035cc37d12742f357e6272e67df6714b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8edde35d5fa4562977ee4ff77691ed88c1c1e4c07e948753dbcfcf49eef00f90
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c021c17a2f9007570efef154b5871c906931d45b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af0163232c9a0ac79c7ac7acd8e3f6205ed6cc19c54030076e1bdcd01411d4fa
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02d065eb742e3e46378e83fe6d367b786f60ad32
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be88049fd582fbf752a9595244af707b5e23ab108a740dec598216a0c3060f07
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebecd6ed42dfa94be78bf10b219d61b3be09186b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a9662cafb43fbe1ea3dbb7513dd5a56fb7238a738b0327bb7a21fa911e17334
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bc9d46dc67bfdeed98509a7fc1b5e4c1811e8ff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77b070641ffa0d7b4537f142cb20037ef6ccaee4e96c249d73ca0f1848ab63d5
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b834e779b809ad21e61e918448c36a7f814a078
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49e2893e9b2b95abc0df9eb94c8fa910d376a477a8c83048337462bcf09c4beb
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ffe6df06f22342134d073e88fe1f2a1c1cfd0bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f0789e948d9a515f882dfa45e4cbb20dbea7609abae775844197fe445dc961b
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..082bb414180c0dffd3ab72f13102e4c04c38152b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19259c660510fc64f88102f0f01325abf4abd844ad2bc7e9d832eaa5ad23cd02
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b3b8fc4e3aa43e0a5125fbd1ba7704e7177301c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21919fed7e4891696466f3a7969590e0f39a2fae1a473576660993d40e05f322
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b468a873d9ee049146bea2b6b27b9d24f6a68897
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f96452b058c0ab0fdc7f6355465e5de43891bf453400d1331849fab9403792e4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b92bf872fe7ffdf531712862cc85eee1518856f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:163d0f1d16d9491229b3d4dd636f11938780f1883ca3d330b0ba8ebcc9a3ce8c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c2f0cbd6f8c9171a47fa8e18e76950a431f79d20
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7329e4b0f9edee272c695a85ed59e42e02fcebf00b8bfea77283b5bb1b54a8b5
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b17d675ee6142e6400db511b96113fbb8b3b960b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a7fdf92e2e6e8edf7d49a9f2a1f27e3ce5694126adb58eb952cb8cff5b48f76
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f34a94b1614cbb2a60260120f1736a386cf87d4d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb91400091b68ce7d2eff7bde2af19a3feb37f8ff8cad87d96d0b6429223b002
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e075e00d9ff0d2f320a475aef4436c8be076af4f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:baff5a19316a62bb65988ede92926b4e9354488926e91dec6ede85f1f5d9a811
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bd23494e346f03f6053185048ca4e17eb1032cb1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ea560b7a1567db6465062f96238961393c78aa51c4a5f48b21762df045ee19a
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99094e43361f93d74cd77422611b13e7f4b1c59f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:992939d432e9eff4620329ec1ec505960b7ede510b5fb0aa20c75e350d7b089b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..95f935595d0625b0bdbb80791ce7d19a87f7e275
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c135b10eb76ec11f4573685a387dc84dc59992a0fda31727473b34c512f3b07
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d05637cc63daf8dc7ffd2d48d6607e9f2ce652ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e3ca52072400dee1cd30965c92f3bc41f52d45421e83bd601125f401c914139
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5377fb715841f9acbeedb4f870d995fbd80a7278
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f9ec3dace4502c6d546a25f88141aeb9aa9bd2cd2d6a654038b8994344a7ae1
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..256c5dbb89d4ddf224fe9a262261823bff55b81d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd327b34d2136c5f8f045fb964fd2e5b770703d4fd037a70e8be90641288693a
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7462c470f703049d4ad04f4f5f844628bd3f0fd2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04259e3581cc97fe96a7f4961fff5a7fb068b1afa1d42da50fcd93c8c2ba37d0
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7d099a4d2bbbe8611a5d31e307614fb931690ca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5efc09c51102c6d619fead40d7e07b16b1610fd4259f1d4e1231153928d26e6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b0d9fbad8cf4305c8777ca250c56c645e93d8e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:431a2243f75f23a88de7339c0d4dfb727aa7c670e1b4431d92e95f9ec2348310
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..859e2c315fb3a8a92f9e050ed26626cc4b7617aa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e342c3f44a62f0da2566c4f5aa531e07fc151c8fa7695e691eade7c5cd769262
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..325fe29c0ef268219b1571d5ae4e33042bf3e57f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:054d07fd337dfc90d7a6d8ffdc72a0512bcdafef606be4df1951c46fa63698ce
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..218a8d681a97fa249ce4123b313f4eae1d7c0b1f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e00d47ed99ee2ad849dd5dab54d91d1a00634419f2b675790523c3ad6fde32b
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8adc50cba37e130bf348786df5c48a26925f606
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2df90ce74ec08a64b041ea4b5d6e6656edaaf78473382f09db753c28542ecec4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b6d1202387bed8caccaf4f1d72cebe807fa56844
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:55996d4e08bf0cf6422e0d17f7946de4f07b459dd051d403ff42d2cf67839562
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..81598820156fb29d514ed185835943453f95fe29
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7086c4f69ddf245e3bfda4135f56db4053e673d514111372e2836940aab29df3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c03fa8528e9199da42ff621d1fb972767c3ae059
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:515154f25d6bd7f0792710ab0ade5b347b203f41f8e6fafb1ba7b8cf0db68ab0
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..78f3044fb576171c1a78b448479b61866331cf83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b2b81967aec8f7ad0babaa8d0f05f040a7bac72a018b4e0ff78222cdf75e505
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83893617f9d68a17dbe641c24d2d49e85311b3a8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4d89434e80ba0d326361b98d198742a86a45249b20909e30da4df60a71e527d
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9e38994e7d98596c843a7c9665f46ec63b0a6761
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6030c3a8d6192f30cc0cffa013c9779ab25493144d06251321144cd181f7dbec
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fdeb7bf2c0b8883eb55413c90c214f0c0b6b024a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f8c9375e0265186a6997323f9677459d0126531d4e0bf0e441042d18be0d976
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f2264550f9a2125fed57a4719d0be6ccd4b450f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:532153c6370e0c655037f67be823116128e81bafef1105c21571918d7d46d60a
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4155201e6ae1571a5df27c988686a15503dc7a15
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26bc7b6079d2259eaa922c30068bbcdc9060eb01f62b72ce5b607b385673b982
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..709b9ce20cd5a9d0fddd7e3f2a6e23b4e740ded7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c047f8322f67b87333fd9b0545313eb6fdc4f21b0b3b50e73d5a508f69b3663
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b4dc4603588da9712c683729db5363319b2460c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d41295ae81669882018b94571d8baf93cc4f3308bbb0bf211b461f634495b75
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f22e53c3abe3ce0e7fccfb4a269eec80d47e3d8f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ab3a58b2d12f9f59841dce1e7d9f8fe2f5b0a423b474a0d774b419e4af73e0a
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..84a13f62b9a23dd13d2c74c2a820a642e57a56c1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfaeb746c6468ce077003da17896bdc7aadd91c7d1ceb7704e1e62f3d46da323
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d3da38b9ef83f898cfa3ab3d344d806505b1d7f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccd4abdf18504ebc8ab588339fc5351337d3ba6bc47aa6e3505cf42a53d38d98
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f7e4adf6ade8d3210958762017b33069306ad88
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f1ae859b7bcd9dbebfad7afd060e521cdfb4bd6a64e1c9e3e07a9f52ad1afe
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e06110469e521de1096577daab9f636072140b0f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbdc3cb12cfb6af7adbeffbd879094814be9d43207ab9b1ae99106e15322a697
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59cc67e3ca1d93b165ac4ff4970ca3fcbe2971fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:691e6e2e23e43d41eb904fbe348372aa2841da4d581040f07e4b06a14c637306
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c13dbf4920324abb6d0cccad05cbb8c494afbd22
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85f0ef4b51df634b38c1593d2b59c012b7764f02124dd70955279db895c632f1
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cad5c8f0f8f29d3be6997c7c6181fde7802c03bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc9c93a5a2d4e5aaf317fb772d32de72ba6f79d0dcf0574cfa3ebd4afee136d1
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e646a6e4a5aad8e417911e15b33f1f44e923769
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae484abfdb5d0d16f3e1c21755a2002d2be73a3904045fcd7a3c146da7794cb9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.42.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..712d146474394ba61a58506b175fc1e4de6d494c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7dd944817df918e598354683b7ad69209a5cbe64c00bf57e3a7f575038b5cb34
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.43.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..494b4bfc5ca54faa4a0f708983e16aad63cb93c5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54a5143a007d9af475fbfa141191683a22d1a781c83b9826f006f30811553e56
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.43.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.43.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..82b0fa32827d867161a61c7db927fed92737066b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d648dac087ab43fade7912081f8666343d6af4a94be3bd7ed511d25b253c5f73
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.43.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.43.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f64338b0e2f48cf4b0439b624adb1b16e6a919c2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:710f7274c5293f186499ba9db514f27fff52295eafd75b6630e317248b9de90d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dc0be741a78b0fe05fe42ac42bf63dc1bf7c1fcf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ae9af0cbd2f573b2d963535d62d80559e5d6ce549e42d31c023d4746d76bf49
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1bb719f4c3825b30e1e6eb8fa858e66e597d2728
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a81c1ade2465369767a4f65360179b07ad025be38fc3eafe45549704bc69f7c5
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..259f715f949f01db9aabf59180c0edc3baa4e193
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2aff6546ca9d85d1d598e2cf6659cf14e1abaf6311ee92bde1161b6459e3c831
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d85762a17f090fc1478014a8eb0d2617626ae0db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:443ca48438be3a7732b499d5b3c7cd76558fffb360d1b9950537d669bbba64e3
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2b0f3663d9656382370ae4a00dbbf42d5b482a0d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67a093e80709d1a526cbf10b03116d5ff66d5351e4fa8f8c3ef85ac00cda39dd
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..506d6b2192dd8558db440a4c3892d35714fc8736
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:273454140bccd29ac50ee74ea442a8031741b6185dd89b135695f20e262b6d08
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..889ef12381a4a6ed5b542ec1ad346eacaea4a3c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51a105ce1d1f2f693dca160ea44d5b7187febee84170f83a6bf84b9df7af4669
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..adc1635a71b8c521452056edc0beaad8f0679752
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c8d6f073a95f0e1dc74578c465d1c9a8db6e478e00972d4c350369bbe1a5519
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dc81d3a1ddfe270a1d73706e9388a1f2d7c0a6d6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dced5b0f87b402f500ad32edb290f7496394b387eddade1e3a51fbd7deb41eb9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..972fbb71e700b18ae3614979ce3a47a66e7671c9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bc333d9b70eebc08badb2c8ceafea1216bd9e2a8853f1c58a54a823a0e6ec65
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5eecf9c8dbcc07ebcd85924cfc30eb591020a865
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:779e85629739bfb99ff75d2bac5cd05db73b3cfb6d2bac3ee588b02887c3dee2
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bd82b79ed8442ebc569538884d0dbedf9bc84b1d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f46409885c6302b333af197b5b3f270d98d5e768d8d676fc1394160b34a7321
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1fc62179e6a92b645b4153a71b304726ab4750e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d18e0f6c36710d379761f394377a010ce248e08624d972edf4a034af8df50e9
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72bb7185d962d8fdb26c6b02a3d335e5a20276bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a0c005ba2fb03d3aa98c68397603be89d3a97f67bbfec447f313cc778edf7c7
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7de4c75a8985103238a8db753ecde204838eb3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b682423fa1897128db3229c2d62390e80cde7bc97df7ce10cc3585ac4ba36e26
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..35060a048ff00501f219e6960d897836b750dfe8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53db3e87f3314e5eca671268986ffb9069ea0d2b085a3a88d0fd78e581201ee2
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4de89bef218abf94bce57150987165d3d039d64c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cdb451f24d36e9bf11f56687a8a73b890059c8053fa01d499f2791886e3edce0
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a5cc00e5078ac9b878bb50e9d39eb93bf7254162
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:937f28fd888930e28d0dad5a1406b2e127d7dfb295252f6f43163acdf3b44d19
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6a0be55e62efcd5eb25ec840b7e5855f7f3ff41f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7b89251e25c0088474e35e70addfff21c26d59d8c230761d7ed39f43164d13e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c36d5ebfb96a6413cedaaee59b93f13a6b541399
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:906d4e46f5d27657c956ef84704cc5db3c7d72a6a8e78461bc58a37842549f63
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..90fd5a11c2247e755f02072854c234a1d268156a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24aff1ec995e6311d552f202bdabcf2e2ef0c3e7a9cb1c05a6dbc234c1c7f95c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2def91b7dc7613b95d88dea3fc154e8185a225af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fc6f1c2a837f0688211d7674f42a46de22f17e30b20551cc99dc7dfef1178e3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0e6029908f2a750680fe54d5efa481ec801dbcc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfa0b4ccc7b1dcfdbdca234d25373e04cab2e6c7c2f65b841c3dfb133d6df6e4
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c8b1d61687e4101ead0a9a254fe80c6a034aea8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa6192d2785f17d6b7a5746dbb64b7cbdabf22e393de61ace59bc9eacc102601
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50d12fd24e334d15f5f8cc5a0a1c0a81dae9cf0a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81ffe1a498ee94f05cf958e7082f74b36efbbe6a539852e8cce6184285ec9997
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a21ee0ee0cb489ece98ec591813b756b7f1103f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6970a29762b043a9f5fb95fb6495813d885f061b8fd53396aa2e4ee3f522d1b9
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8356dcb20e3af8ffa864afbc91d3c516dee7e1fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b0f66fbe4630088dc2c8ff39c1496d8e11b47cf33c720356ba5aff8aee67e3d
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..31e8ed7d2344603dda7a43bd34b3cd89f238173a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8549afa0f7d21b9d92d16969dd94411ed57716402133301ff1e83eb4eb6ca0ee
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c7b1169f2bf0be3c051cee0f3c6cb102402794ad
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:084638ace330abcb2848c73def82f6e522291dc7a7de97c03290f394802586ce
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4547ea74f80401a93b7ce77beb92e5c82fd76ec1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c94c62e860a409c27e5135e5e284d8f15338c07b644c37833be1cb169f6b8dd
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9670f60a8d4a1b06869c756aded1b74971e55fe4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24c34fd88358bbc959d310f05f7ce4ca1dcec0628b77cd711ab45220765d50f6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f15fe5e23922a87f74c60d62cbf4350c6bcedee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:742d1dc2ad09f2ceabaacbbeef2d45ae9b163d971201dd5d37aff32a4793cf14
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e22115e3aa90aae24890b3e4f7c9844e811c2f12
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ef871927bde2d6de8c4acde3913dc37871dbf8aaaab971830bd954ecaa70b99
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c4f0765fca1d714f8cb8e410a11b706e793a30b7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d94be47f8f81d3c0016faf892bc06bacb7aee99628bd8e64d06ebd79f67e2d4
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..14708431c1e20a3a4eb50b22d81053d98adfa528
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:635cf14f87c2a6445bab2c44768883d2d786412c3384acde4b4ac3be946cdec1
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7204a31b8a5fc2b1cb49d23d43cf33923dc65839
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e160f8473f569091d6b76a2cc3ece57a9c78f20cb37c2ec0a85098b924e5549c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..08f796dbaf0eec3404222d0ea3169f039984815e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5586e25293596ff978242a29214da8afae1f0c56e092c0aed50bba312e00c895
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d6c08f0a1d49cb8fb35506dc7678960884d7e2a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f28de83e3df81ae6302b62236995bde7f7a173cadc637d28be672e46cbf54ef4
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e66caf3c8eba9c07c41459424ff1ab38541e6a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b112c83653c9692a17d93d97c86a334030b4e132274f2d523d9aa5a7594c5e6
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e471080ddcd4e538388a9f16fe77a65c96ece6f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f66e85acae524ce053ca83c69da310ec3db16b40cac8e7c9c9538193c89ef15c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c939ba42c4664e8b8201fbdbb6ded2a11e02720c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efda243dfaa3ad21a0ecc0115d869db9a8e7f53b4c899033bccb8bbb78cc4f0c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a08f701c554bd34063db340efdccac0f0f6278f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3774e4f9dfa8316b8bab63514adb8b3086292f8547d65ed8394ae536066b4652
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8eb133c4dd136512e6d867f88ca17cc1ec0e4e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7069e0c8ea081d7cd19be4af785a4832f201abddc047f900497dc032192500a
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52edaa8c1b69cace449d91dc34570be281412a08
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0720890df124aad77f5c5fe7dffa81d37b6f0dd6ee94c36124a441e69c08b3b
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d0eaca96d20d8483329f3805cc1101a771c9d9b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3104c4e2d93455e84dc8419b0b0bc3a4b40dffa5172896ce0d838df62f5bf7e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..51db08a296d3325d00faad294271f1e9b46f802c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b30b70b32f9ea99871d9009495bf90261159e836e22410559897db97955d9bfe
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79b5f369b3f386cd235922f33a32aa3f66663fd8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38e1f2e417c231c24b99d317f7e12da0cac8a840e9aadd2301e3da3b4a23017f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fa6381a55d32383acbf02fdadb7771efa2ed2a0a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abf20daaed46882cf952516adb7ae98e39d9384755c001c254804dc8bfca8b44
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15abce73903a2a05d646631b4272406cb241e692
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87e77ca908f58ed4633d6b3452a6a9bef732760408fb84b65830f9551103b78f
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9fb3515c8d320e0d32ca487e694b858d58749462
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:276cebae278c82c2bbf3b70d1f9a3a90f8171ff577a4025819d087d86c6d98b8
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8571e6f5143d54075009642994a0474a8fd88a5b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:311be9063374c15e8793652a1f1f03849516c01dfa82368bc1e37d2cf19d1dd4
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b551e90ad11aadbaa6378cf7ed0e24cf6a6fadcb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9066eb4ddebbd58ca2d2dd2e98fb1e713ecf9cc099505731b346867a09d8b35b
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..586d6579d04a596cae115f24a24f3cc9cfa35747
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08da3dd363d41d7dab8b81f0b575a642dda4ee26a6f625af4e044b489035927e
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee68486d0a3deffe6976664f3717b14c4a70fd0e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:823107ac05390574e99196f974fd707c02c0b904086c90104378bd71cdc07de3
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..347e48b6d3afbed3ad9b9c26a6eb617f7b1b13f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7cd106f8b3067b2f0bf1b431f7d0cf3b5c6ef1effbe6325dccfa09a436d9f18
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..38e44f5f69650410e45a876ef74a724e577bbaa1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40e2de85ec382ff25483ab00cac150fb6fb6f4394b48d9bde25a9117687b0110
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.43.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..504519eec46b343fda727f1828e6416845e41bd2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d43d4e7c43ff049b5ed404480eba58f5ecc70c2a9c561982bc9a8bfb86b5aa1
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.44.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5e577ef07d4b9305ab04e6830515256dc6114b7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:450a874981e8ca40c38ec8084093ac8f3758e26fd16c137e74be9497ca1a846f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.44.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.44.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d62e455e093846906c8efe6e04d25b7d7d8befe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3065002da5a3f55d2e8896171ba4c1fb761f33e9301df34e5d704fb8552b5496
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.44.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.44.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..184b4205fcc3724b76a83285c8e9f8df8d0a4bd6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1335f138978f0638a4de10bc0a0250d3d74b5a481caf02fe5644d4f0776782e7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..324651af14323ffef3a330cca4b916847453b0eb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2677f7d724edd891369e2d5b514b0b69b42837db757751a23e3cd34b871fb729
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..15b59daecf51ca40f9938954a0f0c87f1ebc9a3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:171d8bc54010a78abb851f19933b654fcb1b0c73192f8cbd5c432dd4b3b784f2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb71f7b06a85bdcdae176a1fd9f53eaa6a5112df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94689764a9a90ee820f67a52b05ef8e98415b076cb4e897786f6b00b5ba6f827
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a2fee774ff4c12492bd7729e2dad374c4f188b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9898757fd6e8ea4871486a93bec837310a685cc00acf76c54202f8ec54f67e2
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..338637737885937b27953565eac800c298ceaa49
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87a81e4a20a9ed466aa93cbb34c27038ccfafae580ebc7aa7d954079ef55b641
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..065e020f8d60913ecb42c8be51c233289ecfad1f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de296d8c0d5165b071e64c4c6715a58fad039bcecdc94f5de0b674945b4b63f4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f1ebe126fff4934072b87ea74bd8802648afdba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:009f40e21ddc88f9512dc60b1287b8a746393d89a9019008d2409acd75b64a7d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..85bb33a7480dff1ec84c82a7f5aeb2fbedc871ff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da277df67f87618e63b34bf37b1df7b05b591709e6b25ec6966a81de91e1d0ed
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2a32ff4b3f7c2448ee1cfccec088469b5cb3505
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efc726c54d60b8e62d60200b9e41e30d7fde7780aef4bc292927bde4012add47
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c4925b539d979c8812f6892327a8b381f1207a51
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:520b937d7d21f5cdf8001bfab8d3d15e0fa56d03edd2d70d7be1de0d614d515f
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb2fc04795d1ca61759d5cbf24e9bfcd679e0dcc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b944a4a23b49dfa3b7c16d9a91d31471f6f2cfe386720ddad6056c5d5ebaef21
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3dada17b57f421f154d7ce84a1009c264aa778fd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89738006d20b52f8bb34aff9428282cdadf9704d4cf2ef23635e99bf23cca0ec
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b30866cab7a71a4c8a848d6e26019379b9dfa8d2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:564118e73b9376c29c6ca35801d125eb2f20b99b6af6fec54296c590a6786d92
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8e5cdc583711cff627a227ee6d2d5f971074dd6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40dcd5276a599cfd23a0a97d237fdaff7f0d167507d5cf7e1fa003326d341da1
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0773b459db7491dd12bdf289a2425faa17060c4b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfaf8a8dc9d4e4e5d56abbb47cfe3469b4f48c1a54a821335ce2f7453f227278
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..69c4b15ee9bb32d79389f61bf5875875628ff56a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:feedb944773885346ae4aec391756bcf67c66900c55c3da7d5cb6809ff1d0fd7
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c2c3861530d63930d6f3a908dc5fa75500ae4b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5af6524423a9a8ff6bd3a5fd35eaed3087eece8802e3da0dd8810f829a4e4b0
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6f6549ca5259614c7687c816bca594ba2e5ccc4b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea70a072dd6551997ef892b22b87f1421cc9269f8a795f40403ed77d70121a84
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7608465adc0be8fa0d87e87f411b8a6b10efff27
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4f3b3a2bcc9527cbc763e08050d2774b3add33094685b7577ef63e57d310c12
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0aa7fa9e3a74027dc6f815e1cbeaa30210f653ab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae697fc10000282a5e7f3ad37664ab5e77d8da041cb70a0d6e75067d8f1da167
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7601bb231dd3030d264c2081a9b57b7e46b1c1dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec8680f42b39d7068579042398291ad4fdec57fb15d3039dd4243782191f9db6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a42a23425e3ac492def07115ce88f561bb1a04b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2451c19b27148ed6d26763fb53e5e9394ea275cdc42217d49434dbfca5c90f8d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d9097e6ddadfcc389081c2860a15b30c2662aa4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f41c4f8df7b658585f846f898847d35f96d2721fcbfdccbe5b5d63aa389ea52f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3cf1e015d8954081f7496dc3c838d08ed0dfab2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf7c6116a7921ba7d8cb064f321053796706306393705685eda27194ba9752f4
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6af99e0c9bf96dea190b7e4cfc55a85268d5a70c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b21f6d7c7424af828bbfe4df822a35ef1d56eafed0711f4f6acafc4cdaa7d445
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..85c32ffe78eb67942a0c1b770d1691ba4d305c91
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec045eda75fd8c74d43e721e531d9dcc17054c6cbac56969aee9581169f6cffb
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..587a52d805c48b35a87fe00fc0a392ced913c249
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9de09511107696822c458290b4f2743d444b7fae1ae3cd627af6759366a6489f
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eaaa16264848a0120be5087dc08cd482d794a5f5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fce3c22e171b7b97446bd1ed2e3581dd280469ad6b7cf1f5ae099c0764a441fd
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8bd8f766313140bd6f041d1c496cb8ff6423b695
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57b8950f99e4e2970c94b38ca4dedaf0a2fd88918beb22d892ffd6ca333cebdf
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..047d36780f79f24329e11f724bf710f25b84cb2a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afdc4989fb0c1c708de6d4ad593bc4628a86663ea3344bae6bd60a219f483d6d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebba87857b6c665c094540801a5fc01842775327
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed053bff4a8d45a61639401ebc470cc24856fec2f3f2f9a2e2e4b5d686cd2c76
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97faff94e47e2ee663fba97cf730d44f40644619
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3424a535699f6a6e59869c3df90c8396a4be6fa01401c55933b07885b53580b5
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4e3389a1da4fe572915549a39a155f81ba0faa6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:baf249bca0f2c51869b70292071a489ec9fc53b55fd39c69e9e000dd907fb8dd
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a9bb39d91eb22a6f76a785034bd7e3c82df54ac
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:987f24b18a30a8b505b4b716528c3189915a516b055a4517657542324c0cd676
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eb364a3781a069aa35f1750e468c3c7846cea4eb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:135f9797fa39f3482fd2f3d52c8432548c0736e8d52343e1847bb33a47e73072
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab5314607c02f0effc9266d86924a5e1c940d1a4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:638dabf2ee42ae2877eb4bcc618b63ff03b9368e143b3f84a1cad42526029373
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff41a4dea6fec02e7491783975573c19a23e4bed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b55b35976d77ebd7a2f89aa3da4f764653a5a6c2a66017de8055bee8625a631e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..efff653d6541d9a908686ce7981c36ba5f040f4f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:355e9625b4683294f7b7bc32c884b22f9d931bb3e1e8a6b281bf70da764a0411
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1219c1773e4cc7235904376c6f276c1e886822f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ecc1d6f6147bcf6a555e658d3967bcddd8064d6873288b85f4f62e22d23829b
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6a2f4282c90bb44f010a59372334c65cced9b7b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aebe16ef261c3930b91910c09219e6dcf556094abe4e9a635f65c9b21cde0c96
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2e0c93dd2c3d88b7a7330fb02976aa25a844ee6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81790b8a6258066cce0f48ff383d9f53c4a740e2261141985b93768b9cb1d650
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c180dfcabec0b4ca620982c3fe5fc35bbbd9ca83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4a6d10d73f080bd173c490611e09481cf6f2e6d4882dcc020f5eda39e4df008
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f88d862ef0ca7f3154276325387f70bd8d4a36a1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93c350dc2432a9fe771bea9c7ac3b5537abc478f8a47adad9233206f1dafd4a0
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ccb2069f71291f2ea11d84f208a266c72a53371
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfb377db128a080e6675e80221f808c798057251e381a18dfd4e4803be179a0b
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b22f0873642f5a6ad1862b508809e78faeff37b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba2f12e3a1001366f30edad815efa6826d529d90672d0122d2c267ae610271bd
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e930b5dc783206de569bedab13e2d184988b73db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:261beae77525ea4bf5c617935cd58352c1d0948765f190f0c8de2130b841fd1e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7cdae6e8b64a39786df8a4d11d0600e6a29fdeb8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2d8b2ecf6b41c62d2cede4445f6ac6bde3c6acdb8c2f3272e7f8ecaeef39106
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f155582db9e8a50e00668ee36f414b69f156c28
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd26cfafefc0f88862a6410c0684242ed653428368e59b5825eb803aeac8c538
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..600909b16f4d62789db2ea0cad00a3611765921c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4418c9a60ea75be2ce36e01214997e05d40b46f375669aa53c2b2b77af82a11
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e0acf5fc2460f3c56e58daa40ce2a58e8c4951fe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50fed9be0093b8366bdeb1b2a6a4504e57f70076d637d093cbac04610631143f
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0fa0eddc838d5e19e78a5caf173ef2d2e366b6e4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ec649dbb936279aef033b473d4adcefbe177b7cbd92bfba47a5b54d08d46210
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6e0657e81901aae87ee55e28bf5c9404ac6e7bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a60289fd83cdfe2f3028e4d6a714adcd0aa4e90e22fbaf3ec14e1e794cd001b
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99399109f71af10df6cf3782a7b2e982b9182616
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3483d290e736ac1243141229b447ad99536db3183535bdaf904c52898e8e0ca7
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8b2a8b34ca8f9bb7bea6fa58fe6bbd697fb3d54f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7336ab5035e471bdcfa29e9ed2ac3d838e3c0269f940b3d0522a29081056ac34
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3567e1ec68a872694fcb508faeee6e1b0ccaa601
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:021d3c71c021e9bdbeb4d96acb75a905e78bc4118eeff4a2ff7defd3a2721275
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6edcfc79ca1b04249ff18bcf711cb83cf8359e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9a30706629513c6ed2579a247cd1af5ae90c993bab044a25d90c922cbefd980
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.44.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..34817396f390679e3fce03c32d04d160575c202d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f6a0f9e0db7aefbf17892c8570055ce0ec622021f46462855430966db45821e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.45.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c01f64731d28797059270c39097426241d6b701e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2eea3a1d60521a928c11f387fddef50ecec08dbdf5347187edcb01c02b3cc887
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.45.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.45.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..abcb05194f6c5f87d00ec7dd01afdb87f342de39
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24a7c53f178f72cb507b4141015764d617a560a3590a4a3424f66ee5e2f96f5e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.45.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.45.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c72f3651a82eb20aa3dff616007e0903e572553
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5746b97daa35736b44cfa628d4885d3dfd68a8aeaa01e4f59fdb1f616df7b98
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b6ee02437a015d33f5b079191ffd39e7e3fd75e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f928981c7837b6bc9b51e01551d67b9ba6a88db7bee879003b59380b501b0ccf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..40b7d447fd5c99449798860e86a76a3a015015aa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:070fad230afc0dd1e173ce2e6f1e86b3e4a25efdbfceb65383737348846aed24
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b79aff80eedc4643e2449b722856728ab1dc0fc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:437d9e03d93236140262df8700db48612b48d41d976b151df68854e2a5746142
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b8a2d48d4716ffb2adfd96143c7668a7f4c4f1b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdfcb76e22136f416fe7dd4a100f46aace82a8f35930359b3ea8367801b4e381
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b09724125bd8bb101f0b2bc73bc9c39f32eaac70
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24ae798af0d875ce3c5d392a55381c3faa076ee02aad4f493078a3a65da9b0c3
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..78ee94638ead63e1da22c544ddb87ce1a3dcf2e7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3662fc04d8af7346ba6fbd7119ce8299e99eb7ac2d59f04ea35f0ef4ad9033c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de321f5f4d031ea4d7443b0d052741016843368f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c454f3c1a9e19f07c35dc8301b0acdc4ddc983afaec65a90720fe2a2e730f53
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff74bbf70ee290bdf7e62f348b7126b42ee09a9d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d275556d953c73899138b87f19698cb357374db84b535024264d659c1ea3d77
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fcc19bde9b6997bf6ad12c779a539a44ab0a8307
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f0ab2e521cc06879325cf821288462ae12d4b33f9682579b8a5d7625e425dbe
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6e4023896365d591def31a958f770013ac6e046
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78965148f29cacbcdea988a00785ff4abdefb4f01af2e4f99a90aee9d722df5c
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1cc993536a7d1a3929043cb5097ea34582540e3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:442f26259258a2eeba006a4de8cc413c7aca715e9359ab3d8eb2dab70b32f2d1
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e15ae755466d14517a19107632ff7c3807180eb6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:feffdc901e9c0d283592b7e52186c6c2fe0bfb87a513de8e94d6f9efcef61bac
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..18b039d32106a8762f7a63b0aa438d63177f1f47
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b751430b8d0c6affc4f2cc2a60da82acfa43b4ec55e74db071cfc19c7e73c7f
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..88f8d92449cc9ec9f51b76f240137be76b6a9ae4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f662415458fa00368424499b2f257e4a47a40f4e244d1330f704eadfaa34933a
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0bd8eb7c08d380a44fbc092015c479d6c9d9177e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:337986e07394d42760654477572a3f26473cd0aae776bb26acdb22f83a4acea3
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46d0e5c260ff30223b0f38681ad04a3c0552bd9a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f488a810e8eb53f76c4bc8554284ffef01fb781a31d665895f6a86a4067d0bf8
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..81a1048fe583c71229720bd7639b9464464077f8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e930f2dbca301d033a3a4bef247c21aff50c4ccef77cf7833214a165802acd2
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7265c26ee562d0a1fea1877338ab4dc5ad3e09f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f36448ec7b26bb4d2c3adb6fae341f4b4cde66353982f1e282aab74fcb02478
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..358d00815deb39a4186719a6b3cf9b89f334397a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7a74d4478b5748f09ab3ceec732e4d05f846f7cd18ebc210869ec13794c69ea
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9585112e0ef2df9e3a01322ae24ea81b7596c200
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36a24cd7c199427d22c7cebc20bbd9fef96d7febd84fa5d85ba4e8e094e9d7b1
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8dbf61fefa548e12944e25a6a0425397392ac001
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0a69ec6b2ced7b6808c8b75f1ec1bc988c64bedf62344a8a06ae79cdc34b80f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a4cc6b4f31a08f55d52f7e566848a851d15ab60f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64771cf7745cf5777febb72ffe16e7f5e223926c902b37b3211c867023a4708c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc5aae8f2c625bafdeddcd1cdabd85fa05a32a23
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0cf36d8fd1b4cdafcd825393aca6e9520cc31ab827765f8ab4ccc53071278412
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf3468526d6357493c9b30bd2dacdc2f8207e66d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:832f8197ef406e5cbc762ff22229c034ac97d75d37c0d4d5de1599780d04cfe6
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63fc0c9772d3d67647762587c30402a6b7ea6e91
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70d2afac46b33d2998ed96b190a1752826990c17c0389d6cad1014f518d1b83f
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc9309b138c104285830089801dc85993e1ee309
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dc57ba749c9c3d3ab1d5cb82af7f45575e6fb27bad9b2076035d160c0c2a14c
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c19f19b523b5e5d91f3109a02c4ebeae116c5746
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fea40c1e05f83da540398093d9aed5feb70f5c924565e0569d07744779726f0
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ddc2d0e0543dd4b937e2f39690b90b61d94707f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d2c1cb31b0a7a37c760fbedab84f725c529812dc4e6f4f1c8a762a81735e56a
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64df01ee18edfb4430a446d8ea450906fd954a63
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9130fe14e419af459b96d69c1aea558cd853094e66d8fc4d79aba23681d5038b
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed6fae260bf5c8ec9f07e9387dba1416643b8275
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8ba5fdba54ee24b8fbf0598b521c39c8e218790d6bf11e2928565c803f7758c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c51056a5facd17c6fdb864528c4a1521caf44749
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3b020b02e3d448e3da903e5628c48c66d7c9489f9b0e7da73b618b25341860e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c59560bb8f0797ce664a605a7cc6dee2efe1952
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13d92f4945e3b55e3f24ef306e1f254d597975a933c0a87cfce9c72d0b8c39e0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e59e462f13e502fd66136cff09ef694b94daab1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a73539d15da740726dd0210158643269503002d482cef165bf1c49fff2c3085b
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dccc1fbfae9ed96b2a5488a0c2799a590dd98f89
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3b07e8ebcba30686130e7f577f095ed0ceca1a34315d1ed8a61c6aa904ea361
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..27d23dbeb94988e9a854727de77e47ccece59c5e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b32ebce92fc79b1bdc4d848a1b9191cee50ddfb59468712866be35f752f39105
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..678b7420b1134c635cfd165a8540d3a050cc8ae0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c5f64a98317949995065a2ed1e2103a306e3af60fd0633e1222539e8096fa5c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a9c12cb8eba8ecd323537d0a14f0c38a40c7351
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc94fbddf45791a65cd01932e0c518886bf8d9c23191a02f85e91dea31460eec
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a17d179b440baa89f2f77e536e9840b698eae26
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be3b23e96a0fc51339a94e788a31f697659ae6b52a9e092fb480b10a4f6625dc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dfe793e10a2055f9bd45c2d3c40eb418b2d5d1c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a743ab556a496f1796e5b51fbb7c9ee6209489617829718825242943d7a92329
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d8bae282ff9ce6fbfaa38419406518b59fbfc847
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3db44404be19ebaccfb28f0db3a4cd549b26e6916fd0295aa4dac5604fc2e7ce
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7215c7909a16f5e1846189a87807743fc2e6d2a2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64fa7225b32062e5a3bde2a01e634bb779cf2510f30361fe3d8d6b17f4cf3e22
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b614058a2770d3d0872c32f1addea8de8bf87d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e726cc6de570d7181b26c5a8924946ac5bbdf3c59e1e3fffbbd99ba03e0fe5b7
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..13689c8cfaed0cf398acec1b8ab7aafb7d18a7ca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbf84a443e3ab43aec1f02fd0eee71dfb27f16b9a78239d33699d6bba9cebb66
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b8919d8012b53f9e4ae8718039581a3cdba40861
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbe34015788494a59d8500b64f53702db542fd16a71836d068ca1dd2dce3f85f
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8fda7bb06b94daa40d59569f178923cc62fbd9ff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6693a715305539fab3916074de45b9d321132d58845aebc8f5f5542752f80132
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f10751db13fbcc799ee9412fbe341a43dfa3291d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecf9e33336cde51fc0136e8addd1afa12b28358982a6f0cd97470d717d26a935
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6143a3c8a7954a5e14d331087b282164e9c7213
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86dd852b3e1c932a94db955775cc339b0b0c5e343c2c81e84dc86192e183e6c7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b33d54d8e1d2ccd7a9c699b0c10a99069b9ecc2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:949cd37c4aaa4010d6a9a6b8a5a43b9475a34fef8881192f36f595ab472bdb16
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8b550f6c729b0d7150abc5b7b211a790d42c2b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:074eb2e2160eb30941f79459d373bc64c821bfa29ede14592ab0ea40247eb2b6
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..93406337742dc53f7032eca680f276c0760aa3f8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6005020208c58fb3e6c5cd444af67cfb223fdd1733199e6e8b04a6f37cf72b
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8a896571b5c820ff0270468dfb0b4ccf9e97023
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:277b2ae01848d54fbea8648030433dde5cf91bdfbce4ab7952a5e53cf9f704c8
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d4cc18970a15ed45a931fe9300a010e6924e9d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab4406c1aca4c46b0fadd4a40365a8c0fe4248c22ab96d1f9f410a6e042691f1
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ceffff3c505594c1d209cddce1b046ed1f57185f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e78cd60e939809c95cb7b4ecf9f4e1cde219e6724d0aac0dad62035ab8fad5bf
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a96d15dc05abbe2e33514ca39801728917e19292
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5e6f8298402bbabae85ac41ddfd17acea5c5057521cf834b088420b618ac706
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ba834a7373e7983827bdff3e210c0aa68ed59aed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e285767e5757a68c15601ead10ce52b8b2858f1b9ecc19b0c0cd914b9394b138
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b8011b7985914251099f361775f0f9cbbc2de63a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:be91a3c35687dfc58e4c77f5fb169343ebc99fa5f8a789975560bd31be15ce47
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.45.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d72f42ece6bb81a8da3b0992960adbb2c2d4f75
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91872fab2953a7c715bd6319dff53a494af36ae6ad358360dde5868d8766daaf
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.46.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..67c7b40d9ebfe9b0625198b107b3c566579a45e4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8f04ccef60d49528fea42986a27f6fed36bdb76fde29c3b5aeaede7cbb97bb6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.46.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.46.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..17fb5c6dc109a99ec437073c1ddb184818be3616
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dabffa589e225a87004259310f07f5c3b0957f614aadca9bd27edb240f2b2bdd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.46.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.46.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83c64a2a480671eff466c474abe0ef53c7cec691
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f67a3dbf1a12fe44ed5a79aee3c0c253659110fce22e228f8c619b0faed21f33
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee1160a753c1be82d2777e5166fdffadd6396c80
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:273a948f7ab080426ab599862cb43d8a7512cd6283e2bd030923d19cbdcacfe3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f334e6f4a0af47db4b715cabe17a343cb4699c43
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f48cc7a9ae23e450e0ff5d4bc0a54ca267565a4a0d644d6cb55fcdb88df32971
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b78e017276125c9b56d393d24e63ba3835714a6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db44e17268dfc55313d85a933495764872733d2d6ef5ce83759c198f4604406a
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f27100bf2daa24b6e8b94612c73c845fe3744a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c93e6e7f7d0c32a97b0c80a1bfa4604fb2ff8a3ddc1ea42fc9348e442a084e4
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b101ca1c3ba75ecafd3f5ee8a40fe49e297c1e55
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8aa08a97f98e100ef058cee0c3896e855611e576e1c59d99f1f63677cc018f7c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54487229d035697bac85059dfdbebaaa5ae7de63
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5baf92d7fde1472b77d0cfce96c6a01c9fd04b9920922d7f49e17ae30fa15e17
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc222dd694c6594b022c2b82bd99c19ecd814526
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44c58f352b5ee1bc01c023ff3b32cbb2dc1e78d62da960d21b515d2010ab146d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62ee16c064d17e03454f03bef32fe10307554173
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:384b36a0e83252c90d76e1be6d8d376f6e6b704531b72e72a267195eba680b4b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0295f2829d667f22ba8f69b61f10da6bbfd3eefe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c6f479e42be8b60c840d8c3d2f34a2ea8c7091f000938e69267eb952c42f984
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3e080e00769e5d3b7127af21954eb36bd122d27
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72895a46aa6b7de861bfcd0afbcccf8481c5a550c99859c87394deaad1871a42
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3aff9e8f58815e6b0e5e64055f0a024d3241e9cc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fcdaf3d8a57d043424e5cfeed60651bf7dd446ab3895a191e93c0271841aade
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..71bb1556dc6e036c9015404b09d6d79ab7689521
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c42c0720fb2e90a8a397f0714904983d97c87d92419fef8b0708ca33f9128742
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..98f2f360e4255eeba72af4fc7bc757b8ae9b016b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8f218fa4479913c42c4b12df1ec40e49f9d5e66d4ef409cb0d496a7ea929f24
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..382762c1105f1b847d2647080cc69db72c07b5b6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86d31e8b795e24771e5f108809cdc21046729120bd158dd93c9a0759151f41f3
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e8cf7153b2747e1a20898a8fe8f0985bd0ddf6ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a040bf6a288c59355419c431d4c068416d8093629cd6a30ec744bd18211db2b
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..87ee5d656b555f449a3d2931ef1e2db1d71da815
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ccee63f3f4e1c9d08c2481fd84fd6f5c4b72e70741fd4956d0e7f95079c3ddd
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..505b1a676e60fce9c83e3368ef57a910db2dedd2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f40f0e8ff958ae64bf2b91f7e97289101cf15c9c15d294772cc88b33e7c3b74
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..412c85212af77f1573e5fce32e9c363a7ba971d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcd35f3bd084bf363768ec5e0d101011dd2dd621fdc215ecd9251d17302a2c0a
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a6d772556f644c72e5fdb4cf95f541b282a30ce
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2251076da015c804caf319bd4733b89bde36e56f104f1762396a4099c96415fb
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f408bfa1e3d8c770bb441884e3f1403c358bfa3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d733aea50e0fccd87638e4ab66f3993aeaf8bb495a0d6f63e3a3183e71a1c9e
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c2a93e73e3a75ba538dbf58bd0f5176fb8f6642
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4907e157eec9cdf49d6dab8c71c2e0927a08c921b3c53c71e33e4c71f751aca1
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..29182b384b1df8bfd5cd566c1f844224d2a5c99f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5eac06e58e5200d799554cf9191d6041f0444a904ce59aaeeda662256254dd9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ca97eb51e66540e3861ee59bebaa50fb612cc992
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f0ab7812d7a9c28c19c836856e334004e266ebdf56843ffca361749d5712b7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2b9a8df8b4cf8d24782d1990b4739ae91e91413
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5cd3a650e79ee39a6ef7124a543bd6bcc5aaeabe3afb032811aadafc21b5ad5
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2105bb92fc88aed13f9343830c219ef4f0d8f092
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:017849b7da2b5915f517a3c942ef1d694f032ef38d55f388017961cc2466f13f
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..14f578c0f9a9ababe7f52a7320f5c387ed5843d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4275b41fc5b60d5d8444a24d37b4a2934ce21ca8e170aad741ba2d6c7d95b6ad
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e26ad34775df503954aa168ab0720d94d108db64
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bd64043163f27f5bc12de76bc6d1a720708bd3bbe25c3e0140da9b49a5be0d4
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3556442bcd1389b42e81f23d7afb5e4ef95de2d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9c6cf94cfc85ecc813a08203cd8b8307ea90fbe71c3f31150fe50d75f99baf1
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a69f07102a4c6d80980ce1f8fb8054587213a5d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e199ab2e69cbea9da96a0a1cd6c1c758baacb58ac776a6db7025c8cab533c3f0
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..94025d556d8fd98e20384c8590865eaf6538c288
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c146e4a241d1205031f3822a2ae385c37419639845dac56b1165331063dce5a3
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..07f556cdd69fe391e4cd4bb584c1394b99813706
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e31f51f05429fa0dfbb1095187a131f80f1dc312266b3da0f13a714cd2a5409b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f476221d108459479b011157156c19faf9849643
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9fe46803a127c70fbc41c13ea4436e8c2f31528c3192cc3ec1752dfbd979c37
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..68515ec383886024d853658ed22bff6a35514a6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f18a29d53204f4aa96f6ffd8b8ed34e647fa5f9fd06c61e7dfae09a7788d9fe6
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52afc26a942bcca71ec16e06f2c95b8cb86c2edb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaba4f1a06a94d07c15082b79910aea8ade534d312e5850373ebc37611f376f4
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4da10178fb8870ca6fe0de2133ea015fe79b9082
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:206518aba2189dee3e8da5293aada92c28a666391e640b0355674ad66a1b7cc0
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f8c69da24fba63975521bdfdaaa2afe8a9b4a78
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29683b78ebd71352335a4732a9138d5e7631c1d8862d79f9dce0d3b7d4c9f96f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b070e1e393d90af1d308411930bea3c630764b2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ffd2c720f9431ccea2512a159f71c9e3fc769a0e99b4bac24c5febe35f0a0f6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bed242ac5cd12c9b2b0ab9d557adf69df483b5d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c1891f34b19acc6c8c207bc1aaab73931486fefdd4e081e91f6c4242a672d95
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc37d8ee913eb2902fd8dd99f647e4e84983cd43
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d3505124c6ce00d5e477c44deb4c31cb77d73ff50b5df969e59f7ec32b98b83
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e558b58aa36c62f2e0b9a8dde829b866e4676972
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac1bcdc95b0addccc307cde7f03ed34f0fcab5177533f9a7e04575bc40637949
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..229a7b8409de0cee84a13afd3a75486a30470cd6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76d0bc4f904de428ebf95cddfdaaf31fa7cc66817137a8dae5a91e54861fa5ce
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..180bf55fc9e8e9bd7afa907768ebb2ba76cfbbc0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6833bea88397b4f479d34107a63b59ec8d579c9da54b94244179ac26cea3ddc
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0f3bc511c7efb2728d298890766ab73a5695528e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e72e1160c9eb88570119157b87415ebb6cdd40a07a3b1a1371685331ea76c617
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..483d69c9329e83417c0468cc588f9355693524a2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efda90c6fece83d877beef1a0986aaa74afc0b407a5cd9803f95795aae0ac9f9
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e094754314a52286acde39fe21e70242c4540a75
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c92230f5962db2196e739c9f4ebacefba2724557c565284a653672ef558058b8
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e6a9fa12a0c8d8d648f396ba1d037cc6b994a8f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ec2f9e54c4df9fb4482a683fa507c76fb8c5405e7d22f5e93108ad9fca4c4e9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce12cce0ed5cbb5df1b3631b8973bac5019e4b1c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73b390429567fa77ce76e1dec7f27d78af04a0180acae54383ed847d1250c2d9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ce69de3a6acbc8cd3f98714216df83daadc7f2d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cabe47564dd6a6b63f1e7d25e2794d9310a80908dcb05000501acc5ef909053c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0727ee6603e031dded31d397c11e3f7b3edc161f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbad2fefde76600632bfe2a6398b890caf13998f2b35be4d96156ffb75018afd
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..da0af848efc44484bdce2c5ade9d931e0d4d1139
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:961e74baa61a3accb2c3b5b74a3acc544966a2e90e83d76baf93fb4dad5ee480
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cad96eed3a025c98c401ee30fa664c4ec50c071a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a241dadc9791eba728cc8d0ce2ef7cbacf94f9d4daf5d23327cfbb4ed323298
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..417ce8286a2259d91cb30da7b554d5e3fa4084d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a82600ac09882ef8536386aed5a48e65fe592b53ca75b00a2c192f5c2461f06
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e81cf75fcddd2a2bd7d572372a142b82f4e6752c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6326e23df3b1ea7fad99f459c611563a53602f81d0eaf8c02362d25c89ed87c4
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c4e4af8708ecf1e31f0091fba417824c516c58f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7691980065b0795fcbbe0b6bfd8709e809d3eef905e822b3ee3fd7f35294df56
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a806014968eb01ff936863fd114c48631e4f2065
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9823179ce46736ae800eb5d44c3a8065f5917c014f0a84d80a9cd3453455af1e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4aacd9cfe122458699ad17da4abc54ad447a1bce
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10b92fd711bf714ab0083d9ef7a89d5d5ee6c64282e74a4d92e9e69f84678773
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.46.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a271261ea637405ebb88f8c0a5802e79ea98d8fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfb5dc2e4ec622549289307ce8b2d6997b1c7d37ba7f6dae0eac5bbab0545b59
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.47.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ce6fde0f1dc260701c36c6323b0b57bf0d94550
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b4c1cf094d7733328afb9714f91609dfa7903ed4fa4ede44e80a81b6a9c0817
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.47.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.47.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab43f3a2a4b4e53d0c5fb7dd73a5e12492cafb16
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5088e0d7ad28ca056185a36c62107074daaa7619fe2eefc699ad106aa2cce2f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.47.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.47.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af4093daadfd23d8974e418531c0d4f2f87ac0d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d928eccc706808f15d68588b4a958e3c79c1578e472215a60b455a84504d71aa
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2369903933089dd0e27df90900b058baf1f04b57
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3f7800c5e8283a1d78a0875fd8b70e9f3d499897849241b91f767bc958e0fc6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8ccc20b22da49aa50597ecb6e50f20588297de14
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afc9efbf8282e53c848a41894db9b7fbfa7d529d4bffe73d25e040579aaba063
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebfe74c33f312467f6ec0bdd690c2d123d902b10
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c76c0a860c8bfb1c38c5c5bf0e4f8d4de5ef8a6e6722ce209692851cac8f1ab
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e0b165fc271d9f8a289e6aa8c543aebbc720f23
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ee9bc91e85eacc1a9cf7d2d450ea0a4b1b47e6194f03b27e95abe4a80f895d0
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f298c38028f90f27f71815259ed7bdf50da565f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1a91b6a1315ff320f90bb11015d1ea0bc49669db9274a860616a0b35bf4ffe4
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..68db557ae38291cd43dd39a8c2dea7dc22f5f3dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4aa65fc59f25ec4f5fd696be86a25c96b997a7c1ce85cb1d5581a487de971c77
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c72384e3e8f405e65643c7b5d00a66fa93b3d82
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97ef0ff5b36793215ad358de12e854cfdb6ad4d58cd85b2f363feab3e6c2a937
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb4e0629254530c152ccaa0139c38cd470adedb9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:684fd75da84eb6cd9e7a847476ea93b0f430de216bb1b56fbe5f9b58d11db1bb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..03e9fc669a3a0384203c9ecaa4e87678f5a1771d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe2d4a2f1d4c97a314a5375358bff7b540b2fe193833d06beeb591fe3678aa08
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a727229d8919de63b62434c22e366819f70900a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60bf81d37a3f1d5d1b1eb6e5439a14141d2714d707757db50b3f742d3263ccac
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1cf86fcac254a45ecab256db77914b6cbba4c569
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:516f6f8b347b5612b30e13dd74f8be50296ae2f00d5856bb41ed1df5b6fc3cbf
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4bdb485ca7bf41b6bb67ef85302d5cf1aa373c31
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5195e21e8d58096c5ec9220d86a00d06b95923426f08eae3cd66624e1bd234a9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4279c2b2dd5ca30336fdfbab78534fd26c898add
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b04550ef6b610b80fc7937e79abd8be4b59a2ab31410e6caa6f35cdd77813dc
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b32b3bf53a78a408d3775d029a3b5f47a2d571b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf86fd60e331a20ee8152dfadd0611c0f5f70a30a17c762e449f944a7b7f84e8
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a58af713da181c5539f3a3ccf42605db1ad8ac6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:254e8e016810ff609454e6f4adff1b698b3194b401f645f34ace7887e4067bde
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e27aae9a10bcdf4c7823bfc6fa22df48c832003f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6024bc775a0ece8ea051011946c63b5f55e4fda756c2570817b3c1c5a6ceab0c
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1362dbd4a79ba2d56566dd59717d9532c8a6aed9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64c06f28b1147c05e3e5cf3986e7e15f4ea98d89c0ea012258281e8b74dcb31f
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9823023178764bf8441f2919f034bb8fb3e4d436
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a4087547145d7a0abd88f2cf775e452ba03f6a9603e415c2b38a528309a998e
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42a4d7d0e3b8456fa7a43a89c4f5feffde4efc9d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ce5194ce80f4e43fb8ab11f159dccf79bf225ed0c118d992188210d37a1f121
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..95626a56b0ad6658083d3f5feacb59d495888ce2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35446b089b7d45e30a7900124748053c9b4f607fb74782ce45501f9bdef87c56
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee0ea39508446cee893e09b026d6f85d274834fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:240314a1fa5ebe069e4a2d08ecf39a5489c399079155b9225a5b44200a3dd22f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..17dee1ce5fd8e49fa296021ac43138ecee444168
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfeb15d9080361cdfff5492b780de98a4be07abe4af11ae4212c0c05d867587f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..49d5198b3854aa3a3f4f86d7f5981b44b9523171
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2c1687cc1409c744bc3af5901b8fe7cf1c542fab983b7f539278989adefdcf2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c321fda36621955cebc5f40e23895dd2abe5040
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d25751fec3976f91c46ae7c9412c6d53739a8e289663fdfba0657ee12a928fb
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..980e1032cb383f650bfca10a56c1cf9cce8872a4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:868c012616f7d91590c9173ef87c3023efe654d8d7b809be33af5b7671cff2b2
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e28e9542e7cce9534e51831cb33ff9d0b919bdd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7dbeb948a522b6015c4cf9ad03ad76e2030d49b6dfe273ac542e693cd9cde308
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a728fa6e0406f2a3ac1cc444b4d7012455910ce
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aef08346df72778060b96846fc6716fa22c907c13643ccf384a0bfe2e9c6edda
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dc530ae1fafd218f7e5b82ac06559bf89da57942
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:596cc4d4b777ec10ffb239dc72704792c0bbe08c37473a12058289a0c1254780
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0730207f989ebb2ba61be1ee309e523dccee0e34
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:853f4ec19ed58488419f8ae12be460c4e20f73c7126d7c98544b127ed702b27b
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..159cfcb531ee18725046d51dd2ba6a1ae99e0dcc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:497509b5e27ab0425e149859d34d73458fecb8ee7c1c4ba05c04fa5f61825217
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7da86a407778cbf1c89783ec355e08a70e76ee1f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46d6b13b60a73892579d08470019936325062956911affe52a6de3af628c37b8
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..debdd30652d1dc6c610d1ea3961c5e260d5a39c3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a5321c8b6f2e0c44d3197173ed09ab7176a8dbccf2523b1648369b4a26bec10
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20dca8c452f86025a1d8eec33692068987e3e411
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb9b095b8500df34e391aac7bc507f9cbeaca2eaee5949767d05a8ed4e3a284e
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f1b7b00fe907469714d1c88458211eead35acbf8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d40062933a3d3ca2edc4bbbb88fa10eff475069aed186db3380fed37f36bc1b
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64641ba12141ff7738baee57114d0183a9e669de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e226663c90ecc7def129cf6a5f225ff2fa4ee11609ad94d55225e26e04a81945
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1bb02c9b194ca23e8422a56450b29e71b265cf5d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45856fa207ae44cd2a3a681781d01cb196dc602fbc6ef12909c9151fb7a47a83
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..594b11933f56f5e9cba749bdb445454dba72afb5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fdf3c54609385ee59112e1496670ccbbab40610bc2437bc169b25a3cb7d2c5d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f2b3529217a79b0dae13cb6a609e371244216f67
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c691b0eb8be852eb38d310f206eb0e89a304048c796b41cda77b8e6e32005107
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..100ca212b449cc5269b744009d91ffc1e34eacd8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efb6908c628c94cd24cb1802aba395b719cb4efe1200f2b7804f6e912d161b9a
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..147126532acba8b2b9efacd5495cd2afbd73c40a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c126a7daebd224c47c796abe5360850805c8b74d9bfa999f68cb00731440b53d
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f35d406accbd38d6148395e0a2ee9b1285798add
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cae93bc2d85e3a534eef03ca9e20cbbee31b62c3a8bcb813f282f7b621b7e235
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5fbfe95c742c4bdd896262007736b693d7a491a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74a4de952824464cffdf059807b2979faefc909b20c63a60eb774cbb8e9b22e9
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5bfc6d0562f0118a63f6924b6af157465e9d9b19
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2a2ef321e344c4dcc1301051038dfb6981a69b388957b48beeb93f7279f1efa
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..242c1d46bcc2865f8560072e9b211fb899db1fd6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce38e0b24c4dbd4f9c53b1a9787c97b58b870348bbb8d340d4ed4d9949cc39fb
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c3cdf55f3a45c9c0203be6eea1efaa16fedb688f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8f0b78924fd3092d5ce65c3f7a3d092c1b6b7d98168b351dc3521db04f5f4a9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1d81af62cf8d5fbb14b001e01e9dbc1033ceed0a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb31e5ef94d060b360619a21154ab0cafaaf915da42d12d4f4e7ee2038f8aa07
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf7f030e04cd240dc9c08dc1f0a9b81ab2067239
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe0ab421d960d6bdb01ed81aaa94d1c5c435c0ff68bc658d4e99140a696f5ba1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bee11a2b892a70bef7be4c8f022d105bd0941205
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d479ab087a70c6701118d26232312408570900ec3160ceb67f4155b22fd61b94
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e16d515f26f8167ccbe6fa38b50f7f47e92c41bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:407f4120c47bb7a1003e43bd10f6636366502323e43d088387b911a34c3aaf0c
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de23926be19ebaf1b40f9251bc2a44431ca38e71
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ff533c3fb3bb7dd3868a553f0e206a94e0822df36969bb14ac77e63e90ccccc
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f916dc73b898dc538146553789d42f041cc013d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25dac4e35659d2b6e77e4757d1698a41ac3d7b163ca3123899becb77f2074ab6
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50e2234445d6ca80ac443f16855e67cbabf629f3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78f66dc3bb68f7e5b9a1f1694693d36f76bd66390dd7380849840206780160f0
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f57b0115ba73e412ef506f96b3d0293e1114290d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:030a0bae892e43fdc6bdca2b212c9611933b7cc9ebef49f016caa645361a07cf
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..84dc57c85f353ac8b66372b1f686a91fe80cd921
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6197cc7f24edbd821b323e66ebda9b4f7ca28b835960568635b790bb99fd6b62
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d68549dc93a910e3eaf0a38896d094ccf27d448
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:121652dc9128c9e1fa41db42785a2e113bd92c42012c50aacaf0258b74199164
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fbb3a5c0f2523c801d43cbea367cfc32b653a65f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d68509a12ba07a4a6aef4ea0234e0d4e2ce17f50911766b524641eaab9910fcb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.47.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..635f208a34cbe935d1e6e31936f59b8ff47b3bba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de454fbab2233be50c44c89d7f54264bf90e56c8a0afe10ba6cc0347a5e09834
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.48.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1c6e67dc29497147243290bd2d784c92917aeb7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e08b9ccfc8852ae5a39cc31279d53563b9752840ecb5f71c9baba5590e31ef7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.48.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.48.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8e27703ddc7318c84134e159f72ab3259e799c1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:867ad42444790aaa07e5444d989636267e33f5a10d5d66b634f8254546371ea9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.48.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.48.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eb94a91a25bd06929a7f51aeef4d1c9113d98e46
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46ce402a5e946b9c9ea795a4206202ce65cba660f065698b3e3a52b7ae76ebad
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7de4f01ac99982b93bff13bf9a6ed5763e4f345a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bfff8f6c2f9285b18bba1bd53e30fdeee35e1059a0527f85feb3ea1befce3c4
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0374a084276edcbf5cfd9c258076a875adfe1040
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3409c8c7253e32bd22357145ec7e9e0109da44864cb0992c7b37b877615f8964
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c59ef9202680fb8d6893a1ee50ec3836eeabe109
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:585371acda01a4afe5096c02b5592050dc34e8add1001d0a4eaf7762f30e0288
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..656bc0601ca8bc7c7c55a040f1708aaef115cbb9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3cd87599bd87daac0f2e6b2b3eda9de3700d6b27b79369800ec1414db65fb2ff
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..398b6dc6469be8acc903fef80a6c119535f10c14
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6652d47f80703c2fc076614c76e62791301bbe8047344a5961381b31e3ac2c52
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..77aead691d483071be940ed0f33327fe9d4ff1cc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4166ab7a150692cb2f8b8283e9bf295c3632bd1152fa4f8ba43d2ef65124e7ed
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1f07b78d959ffd88843367c0f0c31eab4a1854b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf2a0ef20774a794c92f210b5e1b78bda893785ced3d94cf1741d03b77ba14f5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44d727d08db62e82296f1a88f3dd15ca22634758
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a02e202e0bb70b66ca8b7246c00ad250a668804b1c44102ac7f93c33383c5356
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5700d2f63d44988e24836f65b3e9fb62d5e1bf52
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3eb20008a421dcef762e66af9f9e9240f68406221bb261906e03922a969a14b8
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b8d56acbfc870e68caceb964015f0e8e1d21963
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21aac96ac530057a31feba9bfcb13367030256b6ae84465863ce46af4e798ad0
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..963f2bc18fb530870eb299556e78b5da72b9fe60
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02564168fb0c864d9432d8cbf35a8d83bfa243383d82148711f4c2f32f5c51af
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7628c6e8a47d3c584e0d389f0e5cb411378b3e51
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0f40850376a045982e1e6400abb3732079e0e8a05cf64d6abdeb259e9aff93b
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4cf758706a672646ff4de55d9ad2552d020696bb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96512c344545f0831aa47043b8d2a470486a3f1f92c5f454df2152ec9d4dea37
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bd26a6531b841ceb9001cefb0f392cbaf23549b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff4126d83b95e5de37ab6b1dfbdb0bc27d9146b4af253a05d5f4790c0095dc9a
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1ab17ef1c88c94f1b149263dbd4621de1bf410c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6a6911510884061cc8a0e582222381fdf76411ac5077533687192749e43d58
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9f26fa0a64b73d79731c210aec2735a3af55ac2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d1fa70b424b34f55c63780bdc8f1c5b783251f31b2c86494d30726fe36c01bf
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d6c8caf4c4cfd576f0cea691edbd266218a8bb94
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:284ccb4b81e6e31bfff37de8c467ccd7bd3269cbf94031567b87aaa6b91e0487
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..990bf4b4352b18f5d4ecb43ad319552ea3b722ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5bcf59a362ed35c4edc9febd87db3a72ba9af2cc6c6cc9a8462f0eea4d59b33
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a516b2027a533ba6ca3a53e2d47a3cfd63b6146
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62fe3f081a454003756648f93d6288b5a1180eb0cfdb5d3331c7ca84f240b577
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6033346a73ef653dc12ad537a8f163f59955e3b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce0770b10628d5a7080f08b89b9d6e7eaa56e1d70d6df556787e7d9996689cc4
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ca8eeeb4a894bfa17ab3002658d1a8f21cbcc3b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82c7dc5770d5de0f6be9bd566dec31309533a884f140c67cb6fa6a3c15e1cdbb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af3642b2add49ca86c50e9af483836526396f786
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee08eb9a94c93d09271a48101015d8aaabf0a1f05d6c9f284c37b20fe6812bdc
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c5f099c03efe475d2602912aeec552f259cfedcf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fe8ad79a81cd08803691371ec8b45e878a1b2b481799dedb11b231d119baf4f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6bf8a113e344891514e28956137a58751524d90d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2fcf95c8650881370ffdb7ea958592066d1106a9bf0d1300090c7d108493aac
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..125270afb461e7442c0893c1db953675eb12fbd3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef8c5343b300c6c134514f3b573ab43aceb75b57033085cda0e14636a47972e7
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ca59171523cf7f2ca3b9d67238920caa0cf37ae8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2f74761272ab534a0b7e60568688bf18edf56d0fca6a7327b37fb66bdb7712
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0cb9952b0e450a8fa9aba12c4bd647ba6eeafbef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fb0a164010be2f31ae718ef6912c258c424409db03e5470b510bec38c73ed3c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..33a44e25cddab1cf6e38dca08c8d0d0a8cedae0a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d054cb08a7578e17cc75f2bb4d1923e9ce9b3def760b3f3709ee35dea1fd446
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4fe83ca33ac0e35bd7b5948f0fc521fdbdace766
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30b3717cc97604bf85dc6e6af2336926356b2367e3c0414c81bf05e16a645262
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..87801afa9b56bb12734b56f3badac7264bd8ee60
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d112fa5843c90c29e45e34763680ce6694564f3ffc4850bde0507b4484c4a7cf
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e6f457ee257bf0fce0daa4f181a8fa7cc1502bcd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:400049492d9a4fe30a982e2a34c1cf32384b2de0b1f065ff7d1e6e2b7be94b65
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60ef4877d2c00a46040027c35a648e7a4aca91c6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f50f1840296942f8ec0c891704e96ec589bb0e21421b14af712a2cc41bfc24c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f52ebf63abc03a04a133a3e17a67ba9111e3c5e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:388f1f8671a5d3cf56e313749522a0e311af0d5843361e2b3921cbfdd0b8e479
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2513203df1e3c943221e6abd26c2d194bcfc6610
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70197b355a71847edecc75fe45f9e80ee0bf3a9b57d75fb650f4cbb05f539af9
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80340561ad3a1b16f83e7f30b426df9818dbf9ec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b88da1c28a7994ef60d2a2fedf15656ef85131ab6874b49aad322b9f51e10dd
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..952d4ed102f86a35df32a317784a20d988c4ade3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2108e5d396d59cd72d49deebaca80e50e99f747e644518882499d34f6ccaf1df
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d258a1f7dd2f274768687c98456183d770aae3a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76a61c11b1e6d47dae582ad5f519eeae75607a66808a0f1adfccdf8f787367bf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5951208db5b64001f0b3726b8c43e5498c18edf4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9d2d7ff440f582fd6b7bc522192d20a5956795deab443dbf3fe0b57c397c1e8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebd24cb1530653579a87ef9dff9e2ac20668eb30
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eee575540a8c0776168e54c26742ce96ce726f38844060ad45d1b34e4debc1f6
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c02d153717b5cbd43beb7a3120d256aa84d6ff69
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f511138739f68e449c0c0e16870fd7901a0a14335068d2e2d19fe720f878af73
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1af803257408af01469555d34063e50e75ec67ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0eb51218b73483ccbcd1f2b50d687ec5a443fb8e48c60ecfb37e420f2ae304b9
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3a80c73230d94da0746a872d48d66fe14c04efa9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ec1ad060d0f3ba7589015b0d75c984890c5b92292aac0e93e3d1917f3d9523b
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bddedb4721fc1550f3f41321234b8c2ae3569dc0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:622851b11191773e031f3bd9d2ab1d0bef253b76704e5aababeff867ee36b140
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea831952634a90296f6f9b705fd74026fac09f3c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f420561cd979fe3d0aadbc866bd7f25f9efe7a647fd443eed4e32bd81d317118
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5bee6604ceafef38bae15b62a18e5faac6e36724
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b106b51170ced58ed5d9038f1ec0f8ebe12370d71648ddaba85f0fb4291e739
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ff549a20ff79a6bbdf9377cb3414dcd2927c4ce2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5276f935c30485ccef6a01a96337b2facc2ae45c4d3857628174af350dacbce3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..80e79abdc913437954fa0c694806641ac6637f22
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4769aa09a6445980dddcdca55f31ee70f03208870cbfb4be92cdd67e5b742896
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..456f3b29a85057bc1891fe47af29d5828988d77a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee45b19896d996a38be4493155c9d940d5017679bb8ce78dcb090dd3e940811c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d09193f2fe4c0a9e3703c448024bc985756e6d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bab386b2608bbfe9bbaa27ed5caae236b008e2d57547ac073e80fcfff01b29c2
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c98b736678cdc4f02a697bd0bf10e1d0f874f3e8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d67bbade239a0131c8b97746879b0cba9d84055d981c940035ed2a31794f844c
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a58901a09f0ee26a96891b3d38404a8bb8a49e94
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c74526b5b72a5f8238b4c55937c7b1bf17a118cac455e8ceb350d2dfeecc039
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a5a40ca1843b9fc2f6c82296afd0a3a5f92b4f4b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbaa383ebf177608a1ae56b93e3737316ef94792bb7b933c287f00ce5e404c29
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..31bf94cd8d79197343883ce825c2bdcccf414a14
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70f84eaadb6c35b3b6c00a431341901762091c5e89479aba2476efd87fa55259
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d62a4968514ee95beec129ae1c5478d754bbdb6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b89af2b41b649ba4b10a5e5f2dfdf313b7b795cc2257425c09322334e3001af2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c64077e6b047665dbf778fc5944e406e45fefd26
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:549514665decb7cf78525dea3c807773b3eace14509375aa2baf23a97076fa7a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd83de0b150620d69f9d94ee26bdf5d9a5d78beb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2011c8919e3231feff4c2c3da1263dad3479afc4134f6dbc78927f7a7cd4b2fc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.48.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e0ad5914d69fe879501b1f743d693bb306c0d26
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc772195790030dcc1e0b5ef16524f3ebfefb98701ae7627043f9f9df4549b96
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.49.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f12eddaf2ae053dd5f216c21bfc85b70b9bf1fba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c2713ff83719bc830171b209ac9536289838e6f8f9badcf530fd62daa4f36b0b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.49.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.49.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7c7d33981d3993d25f28c87977a276b15e16bc3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18decebbe7bbf3381d89973735bcc52c657d268528bfdefdfb6e9c0c86866e78
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.49.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.49.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fff27963459e1b24b574985406de7b7884e7cb4f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:260f327b88007629b76e67ae04e1ee6e60ffee7278df18c4acf8fc2b519e3241
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..725728bc467886c3f8b07b12826f4b35334352c7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a33e90041712b0d4c313a0da4a95370d47164c072c8dd1a5b0b6fdb6c63a564
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5810b7c1e1c505c0f7fbee637766c92d3fd89b13
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f313d72f733bbb633073730131bbce3b343e6b77171022d23244d3c4237b3b5
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e316859318e83170ccc1874fb6e108d35f276e9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5e2148e9409a2dbfe838844dc35059e373e1a7e5696d5c0a47b8bb8764ef1c4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..919750f1b5489054676970e59892be74ff564004
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20b092e2179df4f68a867489dffb623f53503edc7871e021607eeafac23d1264
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0b343562ee25d62771736cecadd1a20e3b2e59ff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:652f606d5d5f7d9984cd43086e979917224910fc72a3fb589847453697c5db3b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c824d2fbdb96cf3df16f6d001487d3fe40b001b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b44c422ad3e795bab4d73b781a4a578b9f178c7c8ac82df62a67679cc45e773a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d34a42eefa88b768765314e33728caab4eb4ae5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c90dd45ffdcdfe16b6ede8f54a5b79961b1979b99ca6e858879135514de08b9e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..67a98e7563c30119f767d1fd6d6a7b93d8a0375a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d1598304e0f679a45838c58bb61d8eca04bdf59ef54c9b5757599e80ce7bd87
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5489c20b394e63663359f420b6671ce5a3251d83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5df8c8eb10104be2f1a9e4d2a05524f5bba01f328157e24d37f9267f70a7e482
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..131bd0b64fde8f022c8c0bfd54795d877c949f61
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:574012a4c8c0b054db657e302739436bed1ae3f61315490335732157d86b3c96
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d77b29759d87537fca6866b0ad702e2d93876145
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f27d2a9cece1eefe8cffbc5126102bc1712fc0b43dadf1b858e2dbef2d4e399
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3650f2c99052060cbb681076eb88e386c959ea65
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a49803b8bb73408e69820726032ce65329f3fea284108a1bdfac150fa02de883
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a0d558e0a24c6629e0155976c7d84d5402a29ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0295894cc7b1115811e6d764e056b17c41065d9042f7d2727905dfd68f6430ae
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3996a3acfb98b6516642e7cc247fafc00cff3f3c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fe9580188d5834a399c172f49b2e39bf92bc4e74f7a70b5f64ad0d21169461e
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9dc315de12874e0d500e71a0400dfd4a194f25d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e6cfe2d8c0559e537e59cd82c44f33a080b6bfc596b11b848e6a1014fde058a
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..43f9e2b5c4207fde49a7c2f2b35deebfa3088cba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:610fc870865da01bc8756205b1a45db7d629a4c0fbd9fb9bc241338f3bed9285
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d320125b789ed63cbde2aef051735fe5dd9d45a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f7892a9b108922848c10fde4fa4beab2759dcc21b66d3b94e0772a38a15bc0d
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ad6981fe2718ec769c296af5bb903cf2e8957880
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0c8f4752f81302695f9199ba87a0a672f6905ff055427bc9fe312a2079009e0
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8bbca8d000ecea9a4698415b6dfd42c9ced7b656
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5b2c011ee4372443885f72f94a2a75c9283d38a471a664fb2ecd46c9e8d6284
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9916e58a82ed3c2c20d0ebceea554b4f21c71682
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c04bdfaffbd255255409148de50b47e47b964aa491c5fa9a997051b4d005e6e
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c65a03ccc7bb7ac6f276ffc85c907ebba6723f49
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1158dce50a162eb10c645b65c3ba208f42cc4f46977c077a78e64599f60f7a4a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..713bb0ef56f701198915cf3729e9d75e12fed8cd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6de591b829e364c4e182b3ce4bd5a222a0105c2e500dea2d77d8b4583ad428e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..331611adcf88817f45423cbc6d7b0298e2f779ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9947893c0b4ea7a6caaacdc488108dfa3123e48f247c6ad71ec7393dbce0276b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f45fd4b365902b8171b8f59385ba279c8211b697
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58e76cf6e842c0654a66f8e321570a0cf36dc8a2ea374b6c55715af7dc40502c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..675acd60f46a624a2252ccaadf54b5b0a1bb12ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e10204934fda6b72f3ee0a41c890159a273d9528f721005b1932df4ef25e4e4
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1a684fe9c2c4bc5c13b96b516986f1a9463d755
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e008317507a9240978d8f89b08c06576878ea9c3737232e63ac87586b2b3229
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f78af529430df2921b7fa721003d4da738fb20f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbb341870c94640d25d55874eb6b5eebe43d21bc800e66145c064b5acf5c8791
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5d5c438ff5f9a33e84cfa69d782a47dbcfc4ce1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5df79126f312b80050edfcfad68948901cc55be89d1e3e232c67363982f98d1a
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db16d797634f38885168e756708bea71262c272d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95a1094725e79df03c80bb326e9af3d77f565665617e945c9e31341ab67d3b17
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db99f30408209f1aaf1b379b47c277f7e814d36f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a2c118f6e370955879223681ee8994bef11dac80af94e3d89fdcac914075541
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..30e34f89f7ea4f15d315f7ab1296e6556f593a07
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86642fe2d30edb55e2fea8bb4391c70ad972d5a8220dba0f289a37f56e977d1a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4628dab5910ce05f929d8ad0650b2d4b6e7f2f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:797a83382b52a65436c8323036aecd33746530d71cd1fb1bbae49a30218d1c68
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..14d39312dec63baaa3d482086ea0c4a3aee05a7a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e74b47dd8f70d8c43269b37646e98ec4ea25b73d3fd348d65bd324b8def18f6f
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..faeefc33bc6deaa5aba22df03a095dd4306d567c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c9ebadf3418d20c76551236bc6f2423d120d1eff5bad90970472b335558f34e
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..572065a765609e537cbd7f83139bf9d2b5736a58
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e0daa63127d0b035f35ad7ec35785a59ed45571a527cc13edad4139db618b19
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f185b0a9391d630b6282442311807067d0e90e8d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1caefdd4d2e14338b1624890053e9a0d8b423b1b6c6d0f8eda4ddc96c45366c1
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eb29e8438da60c5a4eaa8185f427cc2edbfa7610
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:010846e95cd53a66e616493a6820ff55161c51548f48631a78f5f672ef513bb9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1954cc7ac5d3f9b93ef523dd5e42f72d8cf55d2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6aa40391ee64cdd57e025aca20a3045ec1ef90498f30cdf429c72a77f5d6438f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b453eac6cea4ad2f0c3cca822a595756deb228f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:804eee0be01874924ac986c54ee48c8c96e71ecfaf3285f6a1457bf8660bcdcf
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2d43ca1238b067c8e0c403f69230a56371875ec6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:226b97a48165762a80548d4bfbbb3017717f980cdbd0489ee8b989f1297117aa
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab4774a8c54213c70e4f4cd052659dbc4a9a698e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acecfda9e5d12b77b1601d5dc199060ec4ae182f8eb2aba25ba92d3b423abfce
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aac6e7459c953af3920ac0b07e2abb2357dcb7d1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16eb9f0387a1a0dfe6dfe41e513fc77ddbe26026eca818ef748b1537bba0e697
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4318e73535608f32e18015f0b68700910e7e318c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7fb900322db6a62e72dcc628ca90d9d46244163f5798f140d586c63f379f2b4
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb8d204a8cd64553a1caf910664066bf99fef2b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbf7caa5e92ce9886bc2b8d50a2b4e9af416661904661399678c82be905a91ee
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6581a4cb13a327b7c4674394478841e26d212a3c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b066636bbd6cd98631ef69848c8820a91b479ad4436e6e4851775d3c8e7cac56
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8396793c02a5c979d16b83e1822bbee140938314
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb424a33cb2fb750c1a8fa0e5327231c147863e7642affa4b6e927b19f9c4e22
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..478608d5d89d3ae82b3cb7e40dc3e0fe2cc1025c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b8bc644f77c02cb98c2bbb5ef8f80341e17ab7b9f1576f51ab2cf97dd7dab1d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2bc0a95f44a49313be116b2c80e806cb32109b7d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c0a16da6613c4728808d6d6991e563461468af6ec8a7420730ba9db2723db3a
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a6ce7dac0a5018fceafcbac027e81b4f8fd0e706
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:905218a4cbb42990e934705a6dbf2796ec6817d865c08220b3a7386c77b26411
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c98914d5b42d8cf37af0748327ff93143972109
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1d7b64ba73646e4061cadcccc3cd65a1884262ac1f9d69674d7c2af4a5bfd39
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6fc93ee973ad50361051e761c640dcdff4ee8ba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:537ba46c8cbfa07658105d6d6fa96db7f04ad3cea98d9ba67c89354984f95f08
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3a768e040d738dce1bf6c7869a1c5ad7c4eb842c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81f290e23ba045b12a0dc6510927f8c84b921808d21e1be3a199b5025f0267d3
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5063c0f01a52d29683bc1f1b8e93c0c32da54e22
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6376ae3bb1a5f7fae73c2c7317ec5a1dde64b575937c7a260020f6f838cd8e9
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ffd99d832989910a723eb349a2841d4fd02d473e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0910bf60f4ff4fae5574b179ead92e42c09a112a47050a084bb2b16e699c0b0
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..877bda1e4cfb86badc361fb0c85c81b2b3452f94
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b11b797b54e7f4bacbbecad865d61e5879a726882460b72ef8206a1c060067d7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72679f87bc60bb43ffaf25f6e301dfd45beaaa39
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f85c0cfbf37e482c4636e333f1b0fe91b0e1124a01e7c223e603b934f1808a6a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.49.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2c758af5f6fa894adabca6f3e5bb663c1779a35
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e3e6f2d2dd7da0adaff41b2bc2ed418ec362f1f12d811ce733718fbdf3b8fca
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.5.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..426f2cf5f7b482cf88981a4abd099154e7414dc3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2f02a94a7f5cf88f6b25fa4bd482863c9f7e2fe33ee833d504d38e68a4ea64e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.5.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.5.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fdaa4f384dbbe9d386054b9b0d6468b5ed44f023
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38639b60ad630effe1dddf42bf85aca33807a3630635969109faab83e2cf18da
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.5.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.5.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..68cfbe4bc4550e3e7152c2e1862b22c31a4a3e32
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ee5e052b750fd67c94cd02fb2492e789f761e4c970f0290c1a37f2034aff789
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10379d0578ec0418757182dbd1e17cf801cd7115
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6e5ec9b9e9d11505433fff1817e45f8f7dc0e540ad8121ba486e4b214a10721
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb5b4b69487abba14f0218d5dd602c779862e9ed
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:829ab902861748b7a7ae03678fadb9c4fb7226a8276c7e5716ccf5400829c66d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de3914c318b3c2eae3346fd426c144c4468767b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:771be225bc5c6c4b898f8edbcaf8aa1e9014bcf75534b13554ddc0ee3268a948
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97411ecbacb4a6a7a281bc1853a973107dcb3e23
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea99901b42b7b0de39e770a5dd64bc0022e2983ef890d649e937dc4a5aa87b6c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4891fabf52671ed24b0afa2d7bd0e7d259e2b937
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c8b1187aa90d2c9455c41afa498bfa0447d36828b2c5ecbd3719ec183e63f52
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4944f1adf140b6700b3096f5caa1673e4a48ff4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:943ed09ccb0b00c03f61b0ce87f00daeb84fe691884bd087ca2b8439b5e9cfe4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2fd20fc0082d760f28b0da068d87dacdc2cc179f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e61f5f666af3a1735f823cd65cde6c182b4a68785d2127228a8ad6253ce00ee1
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..33876e60c5ab59bb0355a2f459f29ff0be7865b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0be6bed71afd5590c045f11f94b94500fcb95f205bbcf177f2e9deb2c6b1954a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f36292e150852d5026809a127bb5836b8ee9b63
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5016b2a16ae8fb092c3a4528951f418cdf00f03eb528688d2fe044f715d3ad2f
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..949322231962414057d683f6c1fe290ebef18b55
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ef0fe4124454f6e62d00e32b8fc0f9d88b7a26313375fd8970ccc5ef7b206f4
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f69699166be74dd58211b63c7aa472485b957cce
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f316398286e14fac05f15941414f57126f0ace0239d15801a5486f30b781dade
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e561f43021d9e50b8fd1d9bdd0a15e57afccaea7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a2b892f71941d6fd940ae8a04e46f76ca18c57dbbe0a7e31070aec3390f7351
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00d20d9c61e54443ce5c8c40b2c1caf7efccdc4c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fdfb0d933e858280dca60c376419b2f8db60d1466098bb45cbe9c18f50dbeffd
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b4bc5402a07e0ad40b6011376adcd6febbb308db
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96cefa45bf383f0499f52191506cb27de3330cff777ed6d895b6c418645426d7
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16cba51b177fde46f470a9bb48167461b4c2b3ff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da20d46c259e1859ce96332bfcbe4d24a37a033c06aeb44bdcc097b83edc3064
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c6ca33f627db527ecf72a0548b46fd3d0eda43a2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27f56b2abfe8d7262b345e22dfa8da5f27e4660c4eee164a8b3e68342ef27a39
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..277f9d54d984d3640abec432280d4727f6ae921d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d11ac09fae10f2487ed95ec1b25219ae0c8f7741e3e3766eb2c4239caf44ed2
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..30a0a4dd7bf2b24e7e6c7442a8093b421256e9b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fcea5d9db8b9f7f3c8fd47654fd92757f1b06743804e5f5792317f92fe62fb4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4f4dd85b492a2a62a5b2809fa88e5a18325ed25
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa6682718916a0b0524704c49ab3565e9bad685dfe5b52f5b899d12e789a4a9a
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8d7c8c2fffe9e46ce138802c662d6e583235fcf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04899b963aeaf3a4b738a01823f62fb13cf36733957f58d6c9a0d2f1ebbc197d
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df62b437c8589dc99048f6ed2ce11a3038212943
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4549aa8b500a1c6d45aaa2d1d121476926e058146fc31a497e818ed1552061b4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d51c79261162e3e10b7d1466fc689d8a382fcc1e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed2f2d95d0b1a4c273b5f52b37ae822c0e37b76c2cface9e8483491f01d5481d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3e1f4efd36b71b08ad7f6cb26d24bf0749105d96
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79919c854b73c089815075d47c922607d108b573568f5fca3de21a4d13112ef9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e7e422f2d65bc3f25b65273eb2b9a515658772f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34a9dcf0a6d47e2a2a26fafb465a517402524035f9fbd02c68fc7387f07aec50
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7051263512fed244881ff4fdcfec4c7ed5d44151
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20abb03614a42db51ce737119e3a0595158f95c2f8fd0189a93bbdfcc41a350d
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b77a8cbd58abcab48d679b9d7bbc577a4de03a16
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddc454ef959c831e86fe82c276ee9e65de5f390b3e25b663a01fb7e4fd4d8a94
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a27aa4008d724bebc8be6b0e42414a00289c0ca9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3cf6aca4a988fd777ee4166fef83f7337bddd03f104b1fb055c2a8400af0c01
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a39c62b1516dda30277ce7f9a280537f120fbb6d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc00e4f188e541f7b956141a86bde289bbad446ce6c8b72e807b4a0ac600bdff
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1fa1b3ae22cf42dc5cd81318d14401183b643cef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de37a65a87d7720d001433c1e2a5060916f12cd4e61833c765074f3c17f92b5a
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..803a48ce6a1aad9aa6a91aaf90ba410304db662b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbba2728cf62261778c658ffa3e9c5655234725baaddf91e33a030db7bee872a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2adad38625a0f1768715e68ce8141539fc3ac880
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbcc2565d4e7c260926bca4ac340717d1ae39029623e3ac4bee0b1a667fc9bbc
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..188418e9c117f214ddb6db33f6dadf5977559e4b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:239d28ac2e5217c418981ba182fd9cb9c19038105139c731566c0c8a3ad47bff
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d1de6f74005490e294b53554d50a6f98a9c27ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b7d4443926f1616dd5e68ff2a60bb92d9813d4c1bb2ac6725463f9b40403bb2
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e2a6fcae8430c12cdbe7a62b451d6df05f067c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45cde1ab4d494918731962b92c520d267fc74cf2f27629b6fc1db4d922f49342
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5adf68a0b043be7e136049004d580b24d19f42e3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:921983d56d3ad9cea386a510fad35a01d7d214ce507e45ffdfbf6805abc8cd57
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..803c92ea3dd384fab2246d8c14f428d0c6862fa6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26f06702c380a28f0e2a208354d5caacb9b769d2de884eaf87ddff3dd6c629a7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..41c380525498dd97ba1f1992b4a229911921723f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efcb3fb91189ecfbaf80717921a615a80aae9f6373cc70b79f2a8979ba5db4d6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..956d3fc264e4fca6d8c0afa2df9bdc5f39918c54
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:038707d0409ae935f3a8769f5381fae674548a648b6b6559daf97cf8d401c22b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c8b8542a9108c725083241465d61793a9a11cb1a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c434fe304cbea2c9abeb810ceccaddeed843091f533b4a64e71d72dd617ae8fe
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1712bc4ad4ffddfc89860bef4a7bd5495840ab2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d82e76a3f572dba39cb50dcb7f0f371bbc2c3ac64673bcc795584fc6a790b1f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b2c5bd18e1f092115ca37b28cbe4f80185e4387d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2a58124fce588d051c04492eb9c92258720170d0fa0fa79c2a21b5a78b3ec6d
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6fdabc386806dd35708539fa46d4ef72a43384b6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:687ca187c17254361c2bc2bd328b1bcb4ce4bb85901fbaed7cb78936769f6331
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf1deb1839d90bb55016ac35c35393cb965c0922
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8979add2cf0792efc3f50b9417a1c6dea053c2559b7935a8d23784e2755c8792
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf88f355a57522a225bba780b3f2432e11809b7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38051cf935f8de92f55055a7a5c8df4a0950d0a04289cf52c2f1e5edfcf19019
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7bfc3313788ea760449932b647446b253ad1243
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d7a6609322b1a4522c2d3465826dca231ac9df375755ba0265734d9c6bb6a39
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b1d738e8587809a69a2009b52b32a4fa6080f395
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ad2bb2ee3ca770819211ffb51aebd2493c4cbc1b700f4faba9699bf1fc574e0
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bd408dd48c24ce75d6098d8cdcd8d44663aa7955
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5079e4da15106833dc1431cca8d057d26d6b623f2534a53f4f9a7321a59f584
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7506ea86c083f681fe18e8fb5b36eaeabfa8271f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ed60f562407a5096d035389cb6d41e081c50b69b1b638bc7f822a2015b37d6f
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c0b2e38eb070c0c7f4f7b5807adedc55da9cabea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed9b045003745b010e8fdf8075c9262d13f969231c28f06b2e2851bd006e745b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb03d828d41985b22a6f27612ae6f10b8d177b93
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82eb0650a8277a139658fbe2266ed4f9a12fd703fd22b5d27f967a2ab455fc3c
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20de40d4b84d0ead5d790956d6573850ad940a4a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d6a28a4c7b19a11f1c0f55d9f5574b4ecd9badb1c05271ded3f3aad1c10372c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..106100a592e4841ff7644e701ce300a1fcb8a7ce
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cb918239023b9e08a7f491ab7b5b74e547658bb5de984ac57ba2fa43af0e8ce
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..07c9bd932a6c07e8b91e67756f6eeae0fb04a5dd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:517538a75956897876c62ee5ab5f3d974812a12251e0d02aca65b2b58e9978e9
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..caa5ab221f91d51259678055439b25a381d1d63a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4fa3810319c77bd0bfc70b1a1c1e0a1e3d1049f29c77f3d109641afd59a2fd5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9a7e211da6947aa2212b9966f380e47f58f0883
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f70a5020983819b624fac8164c83aff42f76a3091c86c56f7bafd9089542996a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..776a7cb79c5b3350eabe9f73f9bb5f1229b910b3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c79c02e48fbab59a0b92c83b606fd62ed8f5b67b20fa3db90373f956fb9577f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.5.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d9832db5903f51c125a5ca4432208b3ac3ac8d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83b88b72769c903e1f92f8c08e5bf3d1c854668678331de386e20a1276b91d16
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.50.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f2bc7a20ff499cb860c43705ea83744663d2a77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54c64deecb9da3ba3cbfd95f7fc06cce033611f1af33c1127a70b03f5f82aee6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.50.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.50.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72e0e752f72517f3360a6beea1d3ff7d229d7ca3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59edba1bafc7b2c9a0b55a16983023a42dd3a464ae2e800dcc3c10aabbb9d7bd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.50.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.50.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f6fa20d37da85fa91d4e0cfe2b00ccfe43582c8e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b6ea908816a8297aa7f659a011de13036d0fc8036ee3a9e14771693a3ace9b7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4198f4805e85111418d91829949981268c45fd16
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1360bc3c357c7dc3c21559e4b6d943e2cecdd8ba449a5e06c2e8867e4b19606
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4b8c03c7d32a88b20e58286a0ba449364d1d3bae
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bdf4a5414576a97ed22f325accfed4a2adafe7a0b1f8751cff2879f6a183b0c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63978543d1735699fa9e3ab9c852022f03880fd7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31097fb7691ee5cd1bd066885a0c9bdefbf8e71196339bbb5b3c1d03e3b9cd9c
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64b76697549999e2ccff0f019c942532d3584b5f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca4d311003b5aaef1088a3d8983fd73374c11420c44f5562393ed95ab61e9008
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..209e31234b60bdd46fe8c73aa026541d7a45f8f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a21ffa995357f220879a810d6bca675cffd03aae2562e9352a7b4ef96c93c82a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5cfe31380a7ade2228d5b5bc293c650adf0af0cb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9eb7c80e5c5a8a30c7ab8de47b8d205a67a4c6509dd1b95058004b6ca30abb8
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..019f78657d67d1c10655b1473602d8f0b707402e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2a7f98bbd6ff3d08044ceb11d24aa8e62876f1743a6d5a5d34249e67c94e96c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37e521160da625e715424b99c04305274f1d44b3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62e9e47f2157f51adca0a16b9851a8e74422517da8c4d54104a4e9c4ab453ddc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..952bfa71208f4b97cdee271492d0f83255e3a30b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f430d6414b3b668851e754b12afb967d35ceb0d48948d6a7fe63e370f58f7c93
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d4155167cda53ac45e6b6fefaf82b90c6901cb3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1136f752f53e9fdfcb227623da30a414096c822ff197661b4bd16788d5c40808
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..522be8e08396a7acfa8e55db9fc7e054ef3bb2d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:958b1df6d0103ea6bb9e6aff015aa3af318c55bfbdd6d2cbc3fce9b379c17012
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d93984cadbff3d1a24f87bf9775ddc1d68fbd137
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82693a444cfeaeb775c10a259d43ae902bcd8f491429b63c10505986e426ce39
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2bd29837d323ac480079a84a8b95ae4c30a9d65d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4beab99c2b66adda036ff3247fbc136fe442025d62d542272c2ffb2bfda8a060
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..249f7a3f601814a1f72f7eb62c3748a67f0f7e91
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d5da2a85044838a7d44be57bf1f359de22c8959905824c813dcb428d8867851
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed5d178fbbee6a69921874020ce2d8d4f5ad86d5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81e7f21e9c25e6953874ef93649e574853f181586b4185b79977964c84d6d221
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f332c312ef21d495206d5e34e973028b8ba5770
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd3aa4b282e7f8fcfc7c54bce083ac1637f010337004ca1b9595e2e5f3cbc76d
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a28de672be9637358adcf76fba636029f7f46e4e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20bb64ac56cc6c760b0201529eda1d597b91e96c7982c7da6d9033744fd6e235
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d52d976e0cc99df4332b9ab2adf2d8402b4058b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5813ec1b03d813e6c8c1555e3422f17c55426dae9a2dc1313ce3eec58b76f83
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fb8068f13646e3e6ba94ca2faded528680f3bf40
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a614ffc36881cd46dc7ee2b1fa179cb62972b789d89414268932c693a0602d26
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..017c92b3c81b01b994b1ed8026b6bd64a92bb414
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6d9bd00afadb063c4facacde8713c720202a3844cde78cc2683aa61e7192d34
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..abee2b106f7f05b2be8f7875fb0e6d2d556b13b1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8d00f980db311bcf89217e8ecf9a7fe1ae197cf46e13da9bf7971354196131d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e46bdb05a1c6a2c964a6d94c0e2841473c57f7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe22b13753d96207f603661390a9d7edd7f2b7a48683e57cf01880600daf9c64
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..08693b35ec83242d81dd5d86a78e50359ca8ffaf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a11701b7ad77806206756e657d3950ec8bcbb68ae881ba9647bedd9c2b9e031
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..48c8745c68898909b29ec6f6c9af9773b791d46e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85b20dbbd060029c0f186138f8b50c6c7e9ae423150b7181c470bd92bdb13475
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a4a4b2d527f8167f0d7a33ee271e68961ea2a8f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49d123fe020308d1b5dfd3dd8a5d21f0e7dbea67725b54fd200cdf755fb22720
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b84bf350900c89f15df4d41fcc2f15d469dd568
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e34992e9d101c7fcd50108714e8bed1fc25c248d1e97f0e56bebb5ecc850a7a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ddbb9d8d6b76ea6f83e8d78fb50c0b6088d3f2e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51f1cb07e5a40a9b717c11512739a1a32021a05218eef10e27fc4c741eb221e6
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc10a8968c79962320850140adb8fbe6bd9abd12
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f72294b50faf74f255cc70558b140176e49bddc75c53bbd2136e92a4bb951a26
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa07ce0ea9ccc625d379dc46be8b44caae5f8427
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4a0c0302fc7f409a1bd5727e89ec257b17f64bbc3db9bd8936aadb938aca5ac
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..90f6d4f9bb89cde22bf9f5160edcd98e55d3936f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ad08a7d6635cf475684a90cdab6152406f9a12b2560d46f1f750aa4f41f7f9c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d99313db7b9d76b15fcbff73b19234e7d57033d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:885973b65dac4fc0fcc8b868412001fbc255c3df038b8e1c41b77aaf3202950f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e0025aafbf2fcb7495cd4d554dff08c1577503da
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0937f8f9d4b2f02bfd44567b0c9e153f0621903bd51c3d3b45c0d51c47cfa03d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b69eb3fefa74beb9069c129893c421dbf8fcc5df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:308312c6552a0168a0fdf3cd23c3bbeedeb6561dc2f59068cc1d3bf42abddf34
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc3c7b36c69a6c9e91fb7d753e5e5737d2860286
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bd6af1e6ccf7ff52098c97ad05887313183202fb51843e49640fa9670a2fd0e
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7820fb4a534535faedfe39759bbfceb3fa112d4d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2ff707f17c85a7877595daff2a180b6c1a8cf41043bdb98d80ab135d2e649a1
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2a9d8e8800af1ac91b4ee867c4f5f0621d10c97
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:958198d8b8233c0ed71c97985b7b313bfdf46b6cab1e2034d5009f86c8ff52cf
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4a2fcab926bb2152c4eacad0286c6f4729bb030
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a0d5f24b3ecc769a5da3e60844fcf340380110a595db48dd7b3b996e988e259
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5e57a8dbf68dc88cc0c8ab16d0d1255fa6c38257
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93acdc9ed191a691cc63f60bdacf6e6b6d04a56ece1dcc03179923d49f42098c
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a059f29906fb3968bcb4a0b26948fb07626ba84b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a581477a837a7eeae9b098c0f48e74e2bd94af0c67b3c8ca0d48ee55d8c8446
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73067c590547ac2f63e7ea6897583903ab998a6f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f7fa00c937e8fbad8863f32cb09bb22d4b86ee4835587c85705617b00b1e68c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ae38dbb947173057eafbb790749207a274ce52be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8435186381559534991953525b5c5c5b473af687934c4bd5c8772a1526df84f
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a72faf486225ca4bdbc40c8778ffdf866175fb33
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:397ce826816c57300c1e6527c56e7dc4e8e34fd0b7461be996cf82ef63997533
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00a3fd43f328e51d52bf21df5e5a04342b501690
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9655372ef1e4d8c1691d74f36721ad7d7606696100da1ad0cafd3b5fd4711018
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d169100786fe0c3ff189b4ca5a69cd9f86d6b684
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7adbd1544d731764d79b5982329e865e85fffcbd182028459c23007b0c0d39cd
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7366b0da4216968bca3f44d20ed734a60dc90045
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51918b21b7f0fb2554bea7a8e32288812dbb1ec106d477d7dfc6857c0cef7463
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4db2aceba46bc9f9390e1ee45c034ef7d488f725
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e13d5abc12129e677b50295292b93a05fede56b7b4da4d032b9149ff49ae8d89
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..58b89cfc58dfcd3a62cd8b012eaa6be35d815ec9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e15ecd704f3bcd15ebc5c26be94abe6a5e5024cd91d74216352c6e45fa8fd5d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0643ad98d83f1ed78eabd195022d6aa4503ab714
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a3b78995884f197e194d56fdba6386555b2b7673e761e0023090d589a53e065
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2fd4ccbf91341f37182fb2e07e758e3610e791c4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba4708ef3b289140ed3e311569beeea5b7798fd5bc7bb380b7c4a2e3051c1d01
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c871d007adfb21f459e42d91f7dcec4ef22bc664
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:938100cf9deb8a3bddc56df109630a4f65fdb8b95931ed876c37af7375d2f843
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f37cbc33a0dc1d9a63553298254e756e5d784de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30304c912b7309d9f02fc5e9aa76e95c785ef409e7a02bc98101e03f86d62b80
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..019b1d9d18e09a0d826d58df9c1e633ed3942ff5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a884b19253717281c35eb8c64fee2f91b274b29da8d4c598c3661d60d526050d
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..200c2230d6511e07fd67cd7feb547c3c985b35c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0d124433739808317b5d01ff96ee1b1fd4e653353b102e5cdb606944da0cd16
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6737c7189e05057f25f548dd30c2ef45e1fb6ce3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88929032d89c83fe6ea62662f22853b507bf3cc8d798be6c1e6148436b1f86c5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b60e5340c11f8d72956874bd553f9c112efe5cea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93823191f19a69d1c17b62b5aeed4ca773499398e3a68fc58704a66854710303
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db6c3225fc4095fee890eb2d63c77da4fa187c79
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:250a3cc8c7f153f07b88363b761afd649aa724f13dee8060532ec0d2088e1864
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.50.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..38026753fe1ccf390115f4d0c0af0d5bcc27a15a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73e82450db756ff5ac36facd25571c9e59a6184818c1a842c807721ec25d252b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.51.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0fd106ccd8d479e426a7a7456151bda0c76321f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:919136ca73955285af502052df71f0ae12b7b67ff0ac21ce99f2bf2c6405d6d3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.51.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.51.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f306110e3b916de365d7dfdf4a7a93edb6ebd1e4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5821922bdcc01df83018ee65f3627437f04bbbfdb38afbcece1e1535b8a8d975
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.51.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.51.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d45b82249430e3a69739425bd73633c7e57ba4d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9c0a4d527b4b40abf3f83e6988550abb6948bc8bc62fa2c3720ed708c977d34
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66dd09c969d9f75a4fabe1bad809dfc74566b9a2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:949278c3a57fd43daffccf65593da8559b181202042db3abd167d152616ba8d9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54a881853558afec394fd407288bbb7bcd0da569
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccf113282d0fab6a0ad47c16d9c85c0812eb5cfaa02f926965cd318a9d734a97
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ac0be4d82ebaf21ffa85ef109ac2cb30631cc7a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2485c177648a8148ec439ce5c0a1c589fcd5ebfc16a94fa94148837f17b334c
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8d06c30b977e3a7cff5bba756be334c769caba6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68dc9509dd7384892f3c3bb3b50f11ecfe905cb5f820a2551a496d24445a724a
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..283df0f617ce715903dfd6eea39768bec439d0aa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59566bdfe89f3cd468050e5e556811ad9b14670d25e34f3c4548860c45fce1a1
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..061312184a94369787a6cafc549c2c9c197fdcfc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3ac8275964b3fa6571b1073f8cc78704eea38e14531a899e7fa1f426b6f5228
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..48f2a436a55f35216fcbed88e4b1fcfaf932db5b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa79ec48b5c3233db046865fdb0be8a31ca74fcac28b39269a3532a1c7a855d3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..75f2f44e148944fe911b41b93f5d40bbf9acba10
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:229125fa70279c56d20dfda8f4e8c81aa1dba909872cfc2b1d9dc15e37b30953
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..440d5324960de7ff6b89fa041625a78f3eddbfd8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b4e37ae83be99cc5c19f262d2871231f466b609aff4e5f3bfb8b768d14cbc01
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c61b5504b9ec709b3a2f7086048daf791829ee30
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5dd05157418a8af9b47999d5232de3e26a83a8d0f29e504262dfd5d1cad03b4
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..84079762948faf5130f18f9ea6725c8ddf37e6dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:695ce0771842bd6f8f3ea37420e7861c81532c504762d6d1a5ab4b9588221a90
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..12a7806603611cbbdedd2afd6df09a9aef5def68
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91c42de0108b67600f9e147f0ced83e77e031a273d48bfcd05c1f961d3d29727
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d051bebe2dbfa52a99737ee0eaa1f69b16d10b7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59239e44eaf5a912c76f1228931542055be7e4574cbd093c663b30f52fc5099e
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..014603459962c62fe916a218a87321f3b2e29e5a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4806d07ac83f2b083f9521117da2ca9b08350dbcb3a8c1cdc50d8697cde08ada
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63e1cfd58c50864987d43cd2c8bbea2d97fd25a0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ffc0a0912fa8d463ab65b81e30d829ebca4891ee9586cad9861d3d0c1aa7587
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..375fe6f7a75fffb5bd0917ff77692bdefed862d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed02cb62aa79a3d0dadbc59d41a8038fdb1c4751d6dcb260c188a06b5684607f
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..096691924715ad2617bfa91a8b70dfec49163908
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30f9e0302f2076a0513486bccea40b5d32c3bc7da762eeb557f7cfec6b1bdc73
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d68466bd9b6c10c6a3b0e5ccfbc791de67ce0320
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:145bd5a0535d83f69c8a097f0a301c3ee41656c0ad0f2a165a2ef02fb022945b
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42167cc276a44921ea8d07faeff6bd1488e31eef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9355038ef5328228f89f04bc047266a28a5b80d9c745c4102bec9e478812eb05
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..84d85ce5328662c5e411af0dd8a5ddc5852da3c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c76268b42d524b70eed29f9c331fa2ab9ac4e8aca1e535d873e167888135f85f
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5bb8f5f14a5227d096842d3648f053368a007c56
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88d7502aea35fc2c5a104163c59ddb7de52354f1e4b6c6e2c9f14460c99414bb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..782440e3ed5d4c8a9a0beb446db18a64a66e9a01
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:905fa4e1d45937619202be898d87db5c00685f19d485ac16cca4ab07c7b857c1
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1365e0033369ed1457c6d95088f52f93501fe601
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddf8155bce00ec38dd8b032c655f37ce3b73ed084c4837e1ec64389f6be221e0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d8db39560dd84b0ceb45102600f1084fae23954
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:732dfd041004d99d731a46243159ed4c7cd272b149324dc7a8be5ba84c946a92
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8884fa6c32e7eae173b0b6aa530a14b91d852bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b12778e43ad1ae3db919ff255be154049b41d4d547c44ff647b2f710857cd19d
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..654d304a592300fafad4defd737a748a31e59250
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ed10c65eca8575ca3e369796089b80f9f704124054b24ab31b6f9190edecf50
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92e0021ec8472cafb8900f4837e1a591ab10f37e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a85b94bbcf96286fa72bbd17f7339186e4ca99d128fb66dbff8df11f7ab6d04
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e9a91d20798d9a3ed9e19a8882df7b60761d1dd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4289589faccb401ca7fc8234c2e0c79e205a846c69a46fe42d6d4ecec98261e3
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..55e11102c367a23053950e0e449c14b339385267
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9a9e4ef5a98781335175033485e1e189f0dd96094277fb495fdbad554bbae68
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e7f2d2ce5ede54f68faa41518ed3942c37576bc9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f73738532e35b7acd199e3e190da0977a5b807102c9cfe2ec0790bc297aa074
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1aecd49920cc673b2c3655a34aa094c445f5c1fa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d6f50a5bd036f1ce1e1b8c63e7e3a97857067a61f839f9250c7052bf40547ca
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..51946ee956e914242bc6b53532e0ed30623bb969
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64edc456e57255f072718739c453d131d3322fce2a376e68c4eb029f87b86c2b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37f54b53ce15a3b7eed0c51e21f010a7b8a0e52b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8e1f9f9be632bc50e76e473ef11ebec0dda8d851415a1a690ee862a09f0edc4
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e1a7ac62f7074265ad66a1025cdece26a5f1509d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:774188e4b2488e3f6461ec9b27d940f6120ec58246dae3539ef222aa6af3b551
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3e40c44d64512b4cc1a208fce2ca8361016f9c3f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34899f3a623b0ebe84984c79d20cde0aca9f7f936fd02cc467a2ca18c367a40a
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b9889ed3158e2fb29f80a1b7993bd52f1d9f48a1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b51fa7a3b13e5ad8857a5ecf2534e232663df1fd07394939f8dc18ab96090045
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab099c2716b9a75c4331e19f520de691c6b51c7b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d80be5f5d282cee61e70ac2ad2436ec03dd1c4c57161fbb4ce01753bca7d0882
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ea8e3c7606bed019b745f3c0d8bfea9f437a5f24
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf11eb0b246fd3fe8d5432c49ad3c9b85f6899ce70e421cc1164663fd478a8bc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dbcaa957d3a6547a9a65af5ee47cf1ec9f5ab01e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0caf62d715d4149f5bdb165396dbce3bcbfb4993e7a20a752fdcc36830a153db
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44b213318247da49c347314d2fc2a9df3807a8b7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7322fbac0327c1b78528757e24eb050a7f563afc9063b37b23e529c831cd2635
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5ad45e0de805401c50ab146486f4aef5d8143167
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74f23e2a9b31a5fbe6666af2ff6d1b55dc09f0a75dd9cff7bc72398591c7bd24
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..943d4c5713c0b355c4dff9a01908a896257d0f12
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bab7ba0201faa0ad5d6888b56f9e5da41cb28b6379a9d09988b0f74852dffe1
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..098ac90f0ef92d5360936b0b3bd69fde74968ab9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e9cd3aee68670b791ae6b2f1072dcafe890bb6774a3ac6b9408d7b4903e8d8e
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e50dc44bbd92e68d211b8f3d57ab1dbbf019469b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14a8049f6b0476bc372bdff827e30834d152a033ed86e085c9e75b8e12bab7ac
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2466260ef3e9876d77bbfa3495e31c1301e914cc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6578ec253a6d9e83d46df117e2c274fc3756e0ab10113967d6be4852a388a63
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bd2327d1129b7e751a83b83656ace526b11aa9f5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab30be4bbe7f7f565e5127f9dbf1a31a865c11f72ca9a04e4025e306d779a45b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cc22d164ad3bb4538a47ffb94e55fd13592bfabd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f99464f12d62911c2433b41514f25c8520202e3e05991e6c5f47611bc768eb53
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a9433f2fa20e412d6f166dc3575f69a5cab7fae1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1070f3fbae673baf264ac94d58d7af80ae964b502b7d43d01a61ff445ef01ebe
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c08e9628cd5d3a91cb980dbacdb461a8fe493be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae1ffe215f47b1df47f45503e1ba67ab8596a656a85dd51f1a69ddd2cb64ea34
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..084f398592b94311dd02fca391100437e01eb960
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a81ff6776e4daec9e7c79553b83ab87dc91e0cd02ba6be6115bd44b18a30eca8
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4bdd1f10a0596f47362c0d8449aaa3fe0b51aed1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab57fdf810cf00a7f36e281f8f683143296e9368e982f386e45572b7b2d9fddd
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d2b2bf796b36c49a49ea769eeeac48993c544e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4039f3c8a1722313481e8332835ebbccfe8b8394c00c80f24559c186d35cb165
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8146193617d6bd3a73c2b8c37ca04aeaa21c94c0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1561d7247ad08305e469a0dffc0a7ac858b47be460956b4a26e8ec0b4094673
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d547a9b5cdf26c547d6be5673afc6dcbc13e9ab8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1569e1a27d0aefba1bfe4242ffa98705356a683bd75cc27485a7f0f0bb882ae
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..49f8aa15244f3e18e3790c2b7ce212cc5b4e192c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:95e68ece5af90cda523e0d564b71e90eb0c2e75e3e35195a07c6fc9785343d07
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d04013f526df21c56b45d7b118247cb80c8e1c6a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06dae18cab1c3872fe6a4df6678f0f3e987fde3cce22414c4cfe0c172b398ca5
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.51.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4c4c88f517ad031110c16f81e320356d1059331a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:850baaf5938695f61ad0fb67263bf0d0925fbb37a806cbda775e1e426639c452
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.52.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9210b3d9afb819ea10af96c2482059b6c92e27b5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c71cc9541546e1c7a38f3052c9466fc4e28c0550ff84867685af846c0c119ea
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.52.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.52.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d81405a34cba67b92be89a189afe035d90934be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d2f083aff6af274e4288cd6b8099685ef335efec5e843684db474d86925b6fe
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.52.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.52.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..951797f4c55d50846da1618ded6678e7c35fb4a3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17c998f53dd020c484071bbdfc33f6c1c25ef6cd288a0d482b50b410050c2312
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a11607d35a9620443e9f726aff89dc7d4a95cee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:227a1907316718bdaa6389aa9d4ee3e6166dfeec93fd4033971d2f28c766072d
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a13fce1f880a78520791b1f50440a68223979310
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:460da63bca8e087a811fcffda637a0b52f7a402377716c5d6c8c2b1c054b96f4
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f46434264ab4b922d9e43278d61bab5da6a60a5f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19d73f20407d2e6e0e3c38dad12a7d914c636784d1689b2c25e53ff3ae98f76c
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..225f3938be5b91fb381404f6884aef2c84ab80e9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87615272ca807fdcb993d012ec185bd3e75f5f6eeae0981f5b9b62b8b6c9d38a
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a24dcb9f2679c8ed5f7cbcfce10e220508e6bae2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76997673e61996e29f59804199574c032067a931d2bc8e087926ca0aecff4878
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e2db898c874a04b820135ab169de17a5535a2eaa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d490014c9277db98edcf39f5f8d6bdf5684a8dbe887df376ddc0720ab9590122
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..07cbe96a968b0d7da8d145adf7ba99fc145d067a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6c52e6c1d4357e7cfade7a3eee6ead9fd428bf457bb7abdcd803e7e76c05798
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22f7eec78baf9081994b5d41506ff20d02634676
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:134ea445e4dc0129528efe934463f59284af510a33cd57bcb6fc3f3f1edb2d3e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..849549c89ae1e32768dec3b871f0f1017888bd83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25d248584903290346cd59ed76494fc4c190b5f99e0bdbf7e631ce3c06215c8a
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5678ce64c6cd72f4ce8fd5102339a3e7bbc3a836
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d2f2b65ba5b035e6ba201008313ca91da1e6ce7c4561bc2a382991bd76c260b
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0cae5d0b3d13b4cc2898411a572a6129f6019e3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d81175ebdea7cfaac63ac8b881778604613337e2e1dd95239a8c2974176462cf
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10206b43966278a1d20081c2bbe45b8ec879da1e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:022d8431ef85c0c276cbe4a96794e99eae979d9c520a21880eb45c9284e60946
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a2cbaa7bd8b69a4795dc1be6d4956020a465d19c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19c916acce2f5a60f73fe86a9324149e4f190d29e3201a22a03fb78cfa6449bb
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f28af9c67c925c190ebd57dfbc35f695d849f598
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a57380528eab10cb8106238bae4942bcdc07ac71e89cd43dab3b5b81c65167f
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c7cd1568c38ee9b84060f18246284834a26a386
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:747ae4aa4b3ce9e4f2e177df3ccebc5513b1701936c12a58309bdf5dc1ecbb08
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fabab88f7222cf721964263fdad4b0361c02fac2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8889742df822ddab980d3325b49c2ee9b2eb1634fb0495f41fd7bc3406090b8
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3656a3b021183b224547c73772d9f5b88a570425
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2cce3bf3fcd4dcd990f59dddc887dca3b977714f94f64232dbb8f39bb797bf0
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f60d088e2776dbca1f29e08ce93c2e9160837f04
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf838ba03c7706da07ca8b7a5d3195186e39f3f6599d109c01cd620fab777bd6
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..edb47fabf1ef0bfc8cbbab2618142f17ec1b1da4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3b323bf70b7a8eaa9f55ea7d0f658525eae47b36a71aa858ce70a095a538b3e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8848100ad48a4f034566df4553b48f07c6e49641
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8c07b19552bbfdd87bd397d3417f7632d2ffdbceeae8e856f30c38511f357d6
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cddb21b31dd0d2b93b8cbf795f60427f1b245f94
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1dc622cc607730eb674b799cff9bec09f6a2950239e278f1e9a2049607a799e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f15ac4b1434e399460819a2bc6915a04f7af29d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e7dcc000c8bf0985b781d78281300a74dbcc36fc3d8151b19b61c4060f6d30a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4009a3346a8ed60dcfec233f0c365064e59221a3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57780e4cd625d523a727867bff9af4127974df40fedd7c52286a1452f884aa45
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5a579accb5baca151d32db553b8d824c9f13cb3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f1eeb8ea2b32e9311337086308745978e22e0acbdd0374661bc1c09320511bc
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d5f820da9de4adaf9ca9eb81b7bb24bf257b28be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc2c77be5d9854c8eb8c3a68cb9fcdd704e3d28ee2a11f102c2c9f7b634dc330
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0aac975e95b74cceb05dbf10567cde3c8dcd47e1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d0bf6daedf09abe78937f19868fa8e29dcd0fb34ed9affb0f474c5e0c7cf48a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..78e81fffe5c54d0d9fa1b7cb8d14d58fb8ac4f96
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b59ad39c9b1fe6af02b2ed0adddfbfb8837d55d0d180996a4259e83a4f0fb250
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d427981f917a3a2b8c1a2f77706662be3faafb8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:594c678624e563f8f713f3373eb3916aeed1ca008c81e7cad0419368572a312f
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5138077815b536d49e19f4fa3a9f036ec440e136
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:920033e4530b782ef3f14e3dec9b94c1c57b7fe99f78d9d108b4311101b3633e
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bb3ea7c0b343f6591aa2f1f7f26fd12825748a31
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aede3606ea9752d8089c7555614dd592409b31f6f7aab5d242c9e4dc442bb438
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a33bbf7396b8a941e3990fa7c9d8da365c5858a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ac7fd625952fc0706a20f9ca63316b687ea3b6136c24fd5101e6664e3e75bcb
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7669959454755c0b3cc01178a471d3cdb24aa8bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d88f3c0da3da817a7e550b6b19306c5beec1623ce802945782e4440683e0bdb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c7acb74b23c7805859042635dc38ebdb3cc8dd56
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebf7ec140ddb2bb274e89799a2b08d2f18bf7034cd1655e5e4a09e61f97a3e7b
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9795f431ae6aa5356171cca86cea324c8b31fed8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39e3d99cea2f48e214176913721c35213ddea1bc8b85a02ea2ec07cbc0ba76e9
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..00f46429e804066560d7161b17ff279be6bf2710
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09785b1f92f9c77d85e34c6a36a17823e749be68346ddf36759d77edb57d12a5
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1dac9e225d7a8bf34a16de344c4037d68ad8b0c0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:416a359add981cabd692fdfdf286f60d7fe904d3b05132ae0dd2c5a2cf140d02
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d8ccda1e47ef0cb8f7e7399ed8c4029beee8d2f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c7d9b8e78d210de5ebc4516c599cf64881a4df9702ddddd45a7f83add6cd085
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..16b4e4f76a6a246bf1ebb89de31b1b37770ce833
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69d6e426017f90f43fc3fe42e92e1d6577c71e2d21689217fc20784a9714ec32
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9093bef676e6b0663bd327aed22506606c998cb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43c0b6b2d2c903a8f647cb3b57f0d2bcf80e7ed57628e631e0273c8fbb61e462
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f566f2af019839861f34f7dd94f7f6f1f4dd9e10
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd0874e70b3de0b2c8e7708b4d0acbd44de4fb869a88de7bcac5570a6198d5f3
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5222b7d34a0af96553e6db0e05a5bdb5d5d290cb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c743ce1017e8fe3d7aaadf872212ed2230c8f864e33acc75f02d743a275800bf
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c712a025066810fa91091c58b953590cdcda9b42
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:080c7566400f8e01e934a903eb1a09205c2ac22fc2e0668ef195fd8273e78556
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10fccded0cff75d120098bd3b1c0ee6e927471ab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b1f5e04f7340a22c78b217bd7d4418928dd6c47fdd1a61cf02ed6f0764a8513
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ef17a1aa0619626470e7a5eca8c990669a0fc006
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42d90655aa4dc7f5e269e5a9ed9c7c7699bdcf03e6c7cfdf295367164a828c04
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b4a307201f0eab2f7ae6a7f81747993dda2adc9c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5dfa005ddd6383594cf77b7e65020ccd088c4ad5a2f81061bb1bfea453013232
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c493ca6715e08337d29cd0f0522ac485d19853a4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b02284f839ac55b5c9f2159aa8c2473fce0c6edff8939c3146507d8cd7046e4a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8a3327c2ed82f598ce474cf56a2a96f33f364916
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8678994e8d77d50fd9249c09934c922d9bd142d0b560e9885d6c509e8a72523f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39ef0dfe9cb963ffccdc7c5837695dad1ef84cb2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45718489e3ba0ebaf0571205cdbd8a94786bcc29cc8979b42399718cc53cb47c
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9786c6b9af8663217a9fecf96f97a6b97f266a66
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a54643bcfa0233e6e50676909d4484b33d915c7604f462c69f0caf24dccd800
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc24692c8edc0556ba18e962c53978ab0fb7a9d4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e606ee8443883f141723a79a31d0d626cf4f2ba396141a9787725bfcfc4927a6
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..759771e3be4c5a72715137bc62fde5dd8031b17f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b7a0ec8325f0f98352246d52e120f0b735606c2b0465bbb112a163b7f831a86
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ed6a39b21ed5c3ea5e01f2d022b59601780a067
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad3696a38827bc5aa34709ea88cf32ac63ce75807f5286c77ae3b5b33557ba11
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..01590400999a5c80923786e03176bcee0cc8ac31
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e182abc3e79e97da991ea7680e014b9c4f954fb4027e2949cfa88114e8aa1eb
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83e7721b01f3caf16e38f89cc0d1fc474c38b09f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37f939c45368bb64c72fe32f97f8f8af3edadaaf9c17012e64a1d9285dc11d4e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa2ad7d43a613e8fcdaa4b67cd70682753f30523
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8091a00d967e4cedd0f219fcef5391d6f1e4379a1e83007659bf6923827c1cf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8c6a0b7aeb55fe3b1073e063beac4145dfab391
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afba3160e29e7e45428c15e2be7e31b80c8bdc2d516d991d35b2116e5a6783ab
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.52.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..88da0592a571991e43893be9cca40dd1aab3d1ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23574189ffa4267de734c9222da0cc8aa7a0cb28d536bf2234ccc39fe0c5aa0e
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.53.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3a359c9e484385c40d648e0082b7b6e51c5e9f5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c8775f84099618189d9b68794f0b86639cc5740b5d164696f8f444e357cc1df
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.53.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.53.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..05bcfc7cd8fb682fe3a08feb21d79907d99038cb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2281b357c919fb2039603e0d712fd0ccca4b6ef814b104535940ec82fb3c2c92
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.53.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.53.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e869cab9d5dd9ad4e4c715221aa840ba80e1e0b6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2af88e32447d94da071a3ada52b01bcfaa0c5db5a4a56417837a0110a49abd24
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b974fdb5500989f11c870d33ca705031cde6c79
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b62896a7eb3a47c68d649c359d664a9247b36296b23b950aaf1c99a86439bf3f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2c37d1745b55878dde60a0f21b3c455c2211c1d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6e00407e5c42b2e44a08a98e742f4f9a7ba2e169e6218ecceb4d0209cf1f9d8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d9668828fc4b156468fb8bc82c58adc03a27ebc4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3dddaab49039b9df048acda55eb81e1479fa66235aea8dc26edd4fc491ba388e
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8c39d41d27a883e2d0be28e2e1dd1909f52d3c68
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d40270ce25c5769c54db76c80b3f5d16fa7d1bee554dab6e012e5724db493ca3
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e2a5ea671d3d9fc762f05541e9e6198ad9ec27a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e069ceda65e3ac65297a32dacbefb4152f90faf71a06e5f986ce382c332c28b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6708eff691e9a52c1e78accea326f7f131352939
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08aad44ed938423264adfbc8791a8a767e1520644fd4886e6f2c99643f7298ab
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..523e777d0200fb22e49e2707a55d979f321a8006
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b642544047f534810959e5f584e8f4d4abad83cd2ef84a73a772a0edc4dc29eb
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b984dffa9a27c85b1c21991b7a2e06c80a8c94dc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:470e6bfbe169764ccecdb3bc6cb47df329838eb29cd8faf942588281f6656040
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e835faec7986eec1b61a646ef95d9835d60c1011
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa021612065784d924e3aac39643d362308ba3dd20707871d31938660df311d3
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1cf601fff09162aaa6f6b08d6be6e77c6910d6d2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab03c3960f7bb06655e76845207453abfb5ad1a40f95f7ca3a585779c3164600
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..de0cc5fce4deeb29e1fed0d84df2c120cb2468a7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79567d1558809740a99855179c43e83a09728ff07d4f32e03d40de22b063b038
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6508469ca2bc0c84549b92fccda2faa2200033ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b0e0e556ba0a885e83df0475a4e1ccddc285694853e22fc88c1cc30dcad82f7
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..934e5f21e2c3aeb93051ad8fa0a2b99403bba185
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:966e61d3ffcd42d625ad77cc83dd49486faae3580da55908a9ccae241b5838e3
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e9c9a724379e56e872201585ec1214588c864ab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b48155c71ff7b575598b02ebb7fc212296def21f8ea78cc01f46e1cc1a1b1e06
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2e858c49e5e737362bb387ee34f05ad861dd9fe2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:574cdee6af155a2924c77a3206296b2dca1ac8ca43de1d7ddabf3cdcbb536326
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73b486552645b7e372eaf39335576a39ebafdbd1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9b114a52bbc68c950dc29b05e5f34ea03d51f3fb2e3f9628399ae2d0bbdda7e
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..97e3dac789741911c5ff0f172b266d0647086109
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6057643e4641ff8d1b4165d7ac0bff4477bef6f20ffd7f300c079d838c35a06
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a5fb57535eea5678f83918ab94ff3ed8536f722
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31bca3566b85f244c4362d2c919cfa92911aa08bfad5bbdcba7d403d637a12de
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7503ec48b81c6d63892ab52a15fbc7530e0b414
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e10b37c7117015aba0b145f7f9d6f5a70aef8a35fe93dd3d9f866df74422b4c
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e7241e7dc935668c12a536fc9c7e6cc6b301d971
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94f823f504e4d61cbbc6c46add9e7493a0b9a2e5e412908cb52bbcaca7435f53
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..597337f902445d6e457fd895b1f50ff27d7a3056
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8c44722a3d97b7db0d4fdaeed67f9de878384e49187427c1212e4c15a45d350
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1b2ecabd8f09c32f4857dafa4689a3679ba83b84
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9eb59ca43b6e19092c37876d82d7e5fa85c0e5c4ee7d970d5b6af50839524ffc
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..afda5f2d4e3d3b4e0a9f148c2da9fafa545c1940
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f1be02f2a321ae83dcef94e9ff9b7e2ac40451ce73c32b568e6886b852ebf1f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f4970b33e168f0c37a4201f5b457675428db764
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69f75882f9c4ab8c889350044c69227dd102fa82e5955d91645e34e9eaf4fb92
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c86c6abcfbe226e9243fdbfd977836bf37422715
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d194e323a6ef274ebb6819e7a24f1455e5bd89918c3c384ac07fdf4f3f391f
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e07912f76ee13bd901e88614ecc8255a69f66ea4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e58c79a7a71a7079e4b5712ed343e67131af9db60ae4efbc9aa59d748be9e5e
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac6cbebaf7948f97188b2cf545f36a5ec60e7e7b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2815e309b6d4a83792214dbfd261612d80c926592dc497b291db3ff97833b20
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8509b3e05816726e0748e873b93bf60818685106
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8f7d35d88f30aeb7d337f942e8232de6b37335ae3d0c97f673e957f4ee28a07
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a9d59ecd8de60083f34d1eebc2f3951dbf8c0a5b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e3a7eff939467ac396a06fbe19a072d1cfbbe397a4f3226b1b45ed7f47b782c
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9f438f8fa0ed8776071bf41ea37b5548c2ee250a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b26dcf43874cc24ad2b67933afb61e2522ebb3239165622ffc905314abcad52
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ee331aeb783aa52ad21bccca5b999eb2a04797df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1521864934e3edc9db52609df0e93afe7ec0ccf5c54373a22e164f161fe1d0f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f0ba0c7425dfc1dd3bdd9cfcfc4b417605f72e3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f51fa6d7b47e66d6fc540474368b080aea78de91ecb345b2137813d7ad1fd8f
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5eb37e47aabfe8a744f17977527c9610775474df
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db81dabcd59a6b35f9dd493550634e9314d70d272dab3f96aefe726c4a1ee6ff
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3789ad90cbece43ebcb08944db2c2b14eac441ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:468b39dd410ffd792ce271a392486307b1dd084c437aab9bf6ee754601bef3f9
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d1643525a8c04fd7dac18034d67b6873eddefbc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f97ccfbcc0690262cb7ec255e357572a4a5982cba792c9400d14423890c646d
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..94c05e7c63ff5fc6ae43d8e716e80577ca2f3d40
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1e9b456567db80de4d21952a3aa2cdd645cf47c5832325634c2e28d1ae6186d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..94b35a1aba8a43c09bb6a31e339550c15b96113a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e2ee23296120e29bf1bca5b07148c40d937b187f5facaba1dbe8bde3d587003
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..42a130b78f6bcdd314cef8b95a59e6ead48ad6a9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1be85405ff3ee26f5cff1c09d0b7dff6ff61af34f6fb5c616a04ccdaff1c4ea7
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fe7060d79dc755db881329ec6279da0c248af51f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36b864b72e328c16f27f2a74170d43f5edb87e78561b7f42b4e62340ccd13b9d
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b8b85dfa1e55e5da6cb1bc74c2e03bb6e9175938
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b871481d3b5cbbef6455eca4231b1c1654fa2a34750589da5ea418bbae3048e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bcb710f9a9a85515882ad70886cafa43e75eee40
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:116d8bfc658ff6df7e0172f751dbd12448c290228b9543cfa2f6c95ff7ebe5f1
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79d195f1a6c2f935b756a8968973d5c5bcb36da7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1987bb1c414d03676689a31d26721d63910fd0d53b55ae1de9a9daa8e485e711
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9e7905c6f7de40e2ec61ce2d775098b0119b475
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6848d134355a8db562fb45572220efa3b299ed8ba487519bcad52b69d2f15343
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..98b90bf004d1e38a6c1a3fdca9e8da4738748853
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0f75f55ccb88f53215614ff88710cd222fc320718286bf032664e9cf9c42d65
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..850976d86e168bfdf9225203aeb8fb1bb719753d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e70723b1a76745ae9b22d8ce05c60f3bfbfa53cac4e4d261ea7c31a100a7513b
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cf9185eccb5112e57061f9c1bbea089ac5f2cc87
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eba47d1589b486419c1c80e9dba4c1e5af920e8e236b2c450735c23c98f25272
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44693cfd03970492c1876450dac25cdbe6c92b93
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a5fa19a3681da1995ce2971a86f1014007949c8578ce3d5392ac56b2bf49c90
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..72cf994b20856e62b98fd77e92d662baa092c349
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52ee15a57fcbbe84cfe51787aec54a0906bfbb101c666981e0f2dd509251e058
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70d12b1ab16660b163b548935e5719c76e75265b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2456c16bed2dc7cc57a77a646beb999956c3c14aa4da799feee576e7905ba811
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d03852115ebbac8ba006520f49503348a699599c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf50de29e90e84e72c3ea301183886f41c6909d9834506045facf409a2ffa83a
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..befeb79da7e685c5182e612c5ec84bfa5a5e0730
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:800566c12f6abab82eab2b04ee4c4e36491460c2bfce7f327cb246221d58da2c
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..edfee7756e48f73e4ac1c079e6c3aae96f964686
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffd17b5714a1203b4f743cdc0d75443e196d01422944b237a5d94c05cb265b93
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3c9faab74d17a54e5733716fddabfdf1486854b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:739fe072764fbf5b21f8f10b684998c4bd8d30f58cc58b29ef55cad77885647c
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..580255d4fd9c822f328b727f4015a094011f63a8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0934a7d9f39b0d605023ab97bca729c407d10bb330d977c71454dec28437a69c
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..693e2b6b97b812d6525fa107c2cfa27f16ff03b4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7c83881c4a8108128e2d20595875d5626b4157fae58723b9e2415532a2cde10
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..919cc256fae38d1fc6278aa13cdc9e04ac619ec9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f975182d7ca1223ba4d1f54de7516aa7fef8ebeb5a183582e63b5459cc0f50b9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.53.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..422c4d2c117f909104d92c93c0ed31b4efa01e61
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d5fb901cc3848822fe8ef9f7034873fc4dc1d0ae4d2083054257d3096abd4eb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.54.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..673a3e9818594fb1ed3d1c97127824f52cbd694e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b36d56a447caa7656ecb21fe02a5a87776d7ab6e36486f93e136b8de5d3b04c
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.54.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.54.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20d286b210ce672921a050346465d4b356ebdb25
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07357755fde1e0bf7daf8822a0135d00cecba908f4d708bf6e1f02fb63b301e0
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.54.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.54.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..983d857d4d470990a5a950d4dfa52fafe65bcb48
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6f318e906493c7423a5e2281a44641fb971a8bc74d4c8ddd6f1be8934f09632
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..64b37a9a7e8eb0fd8591e1bfa4c83ebdf210b982
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5669343a746213161bf21b81be21afa3ec1bffee89bf46e1e1a7c6a0cd16f7dd
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c770b6971d10822ca630e90aa8cbe28bbee91c13
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1673a6230fe30dc368d95e304b69dd0800009f278c7c000bfd88f035c06517e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d1e6e42acdf46e8262eb8876270e1f622f186771
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8b51de8d624dbfe663ba4ffa956483f55558b07d0a53cfa50f919fdec08045f
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..94419e950eff025babb6917c5e218e60c1e7318a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8093efda68bbe3891254b95c0ef0b8cc4d09213a2ab2d7c0bb37a7fde4c6ca5f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..90a4f5a3ac9d17b515c51affb7821c1dd4b1c2ff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:996f1960da98a1b51c613fa1753f13e1e9abec56f903daa955dfca1c3cd1c108
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e831fe80ead89898f70e53b548c759c9a20b8198
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65b0cd8d004b4943dc9bd3a5c9c3d4adec28b61ade541cdc5face785cd8b95b0
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1382c1393b0f1b30f1abdb8d77f34fa1b2005a04
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3baed5faa812cf5d7ca8b012ae31d85b953954fd059952e73e9ebf9e10d5035a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bed33aac419c7b3ff8776cb1f898725dabcaca4b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7eb797bc30bb03d93517724ced2c6deabd80b318767a91dbcdbd4d961f9e9b0b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..895859c09954b0db4e71e44d9270062dc6fc6dfe
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64e76d4f420c4ffc5b3fcb3f06d6f2c2c7ce0596278b26e137f8b261e8c0ab46
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f20680ed38f281d5eda8d81dba138745a16e390
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:281bc1759f223c81ed59a7eb7539114815038bd55282e492c6619d22a183d851
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bfcf4d623e4b400113a069ecd30fb6b95542034f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b8c20676bb4c0eaddd135ee5791c58afedd612059f74dd3620234c5bf49f169
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ce4015a92f5cf3400c41ada893bb4c21b598e7c0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:046fa813e27f94c00d22e55e5d56f7cbf9183cd5924687a8c2533b1801541062
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3664fd30cc2e76ee401c739b025f0a2ac12c12bc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b096910594d553fb693dccb12729c31565190b301bb774dfbc7892603b388b9
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8fca4fbe98ee75ea03c2574991b3ea3bd28966a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ca70628183cf63cce176fc698ceef326cc08ebe6ca1394adf3d53c180af73a7
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..553f13ac3e91ba63b3e7370c7bd993471a23e455
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9385fb260a16afd41e10eb59b9c2e3177f34b813ab59ea1c9ec36a554157a991
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..71cecb67eb9ad3a8b1b51eef52ee0706960a7990
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:545594c28934a64cfd9723a485b1cc071f3232b82ec60848463e53094c1f93a8
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8fefaa2fa1017b5b9c2d880ceb368ad6ad0d2902
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7a33822d692e478ca1d868c5e6d9b4dcce8bf9dd9a8cb27358e6567f3cd2767
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0760bc0ab09cd78c90303ed2a04eb00c1496b872
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27a62f5b0806c8e8ab4dd3638c4b0374c50b06313d1303e30b808d443c031d33
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..619eb64fb6e975b920e7da2f625a0925eee0233e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ce550f9e5a9f502c3ed82f17a8ea0a007c7ca5c3be07cf121c5626ffdfc1fc0
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..77e98de8023cc4951ac56d49376b463942d3d055
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:670fb77187801d8f1e78be42976e874fa319ebaecb823f6a1234f3b361c2df44
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..891702150b54f3d8a735451c7da5216fcad6a6e0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4442170cbf0ac4931fa610b6acdbb93467973a8eae17b50927f0c3dcb02de29f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d317a3a5d9489315d453c611d5f1c4b2b29830f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9728c8e152c95cec1ef344246f842ce4c77885456ae989468bb8a240b5189c5b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d22c670c3294afcb79467bed8de11a83b26f13b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1567d4c5be5627420d3ca607246e558ebdaa0a1bf738a522734aeb156c38047e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2df314e7cc696133c12db93aef18a363999e217
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a061c5fd67397232895b2356f20584450aef53bac5039b7964c791943ca2e142
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..880c0086dc15cf06c01e7f1e4fede9eb56b7ef3e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5cb90de9af5834b5df2288b90317af58d9e03d7c32a8c7e5256c3f6a5f1be663
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3e6c4f39ad5a83086c8144a3697920f488f8e60
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04cf120d74a6930e3f598eb1f3d8f8601f65754dbd5c11d6e19ea0c7dac416fa
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6987a96592fb9c9d9afdb2ec5cba0e4ff1ec0ec4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:befbf4e1698e783c10de6179638d974f09f4de5f39788dcbf63f58af1497c232
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..20cd0a6585654bfd94b6ec5654587156609f5023
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69c6cdb93125c7167ea31dc37f858ec34ff4a204de8d9e1fdaea310326bb6e1b
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1fc13f43a38147208866913f9e4979ef3565d8af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d95df74c2d5b612828b1f4d9a025119b9b01ebea7ac874d812d619c0974ecc2
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7f98cd014382bbbc496109d4e977c1dab4076404
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c46e50858d0f3bbe08a20d67a52d9e1b160f0776aafc5f5ec937ec19de1cd6eb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19fbb03a878cdb48ebc426642e21d33eb9832b56
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d341776aac5ba5c896320e235067b876608c06d5d50aff23dd74227161f49fdd
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4b2c25b87ca466d412fae74c4ebcea83582e365f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:101fe43c8cb6136aaf744894fa43f520751d032aacb752f0cd19a091a027e278
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c1ab6dc64b91d511a4dae3c4587ed19a4910f666
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13483d4772032d049a25a6c98f859d805a582418f8adc5674dd7d34c3d91a1f8
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..54fc6ebdd43c11936c80bcf2a8e8bff90ca75e5f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d2ba480dc3c23f7b271bf6dd8eb8642af249bbde7a1950774f0a2898525093e
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..311b05a0fa4bf85e4844b0d3bc26224205e8949e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0aa1c4c7df45651afcdaa3af0232f2fdb5cb81bd8533ede6ea2c0f804880fa6
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3134cdece8fd3e0bd3adc8fb5c6ea278e5d1c48a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:986ba942c27132227ed55ecc82288b8fa5ac7ba7c16c50f3a70c7dc7e19fbd5a
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d2537a6d91bfbc68ff5538055fa7f81e8ae77da
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df927ac0c01f7206464d3d81f4d91b67585f67e37c6765e939cea8f26e08e536
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9dc2fc9c0f51974902fa08608f99aadfd207648a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bff84df06f970ea976f7090a879a47dc8456d9d9fcc03f90c4e3f0856a816f0b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..001ea72f76fe77a0574eecc0b4869188e0f781f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eaaa8388bde13a9176fa0b5bf8dc819d1dd6c2defded1661ca187a91aaa49903
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b4e121474298a74b6c69340e8cc46be523d17e5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:989f4be9e9ebc8bf1220e18ef23ea0af189a6176be6ece0a2e8229f874e54d06
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eae57b47f87ff8f240e758942a66ee316c5ce896
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:433c38069aaf650a057e42085a72ad493b41d4f02c160e445c5510110cb4c5ff
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d941d9e2e3e940bfcba1f82a4420cb342fd0ed3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:117e57a7bf2bb86c5ac9ed300a2471d7127255e59d283885d18aed499c33aac9
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..210fa1c082b5f51e8885c03bc981bc69ae147dda
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39bb41e103587ec3e934fe6a877c43c8e00911e2389224c1e96c2fc41cefa623
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1063145f67653298fe098313f099edf5e3a552c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcef6f18784ae0113caf7ab5fcf25a2aa15aa094deddb9fc33a5bb10504bdd39
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2584214bc42c95f3e59c6560585f85d22ba11f5d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f8a5d6d18ef71f454dca066dc1be9ad0bfa14fd7ccf0ee4bb87adaec677788
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..033362fb32b6af19b8c024d856ef472145b1dfa1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:179a0354b8f00374e7ea0d393483b38dbaa1e26b1e3e9bb3ddc3cfb520d7dcdf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73d03b40b27139151d21731477c8682af51e7262
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62c90d82bb9a7031df5b29e6ff413e2fbbed9c11daa4a4a7b340018e03f4bcbc
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9dfc06c246d78d961a243fa4c3584b2f59612d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f598b5d75cc32f051f1142196c8b1632ec257e6b9f70f8e5ec5ddc4940fc1e0
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ab9daaf6e69260cb390054fcf63dec8fcbfcd57e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f7c082d975d87e303ce4dcd219355feedc7edf4c863cdd2634eee4fb4bd7fec
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ebc14bc0af2a0dba7919b34c131b15b92f100cc7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f9abb64cc85f5b3d66971c2dcbfc1b778ebaba2c8ee5275e8105d40d17179ff
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0e1a9ff00ac304e29b51650b456db22a315c7bc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea308ca507c33584b213e9bd39274d43637287f70a9ab00ccaf4abf6da0697a8
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..afcf25392021ef85e545c31ea4d8ab52c1351b5a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa97cb1dde7bb06bdeda9cdaa1bf2d861e18f04d7ca88b1e83e08a666571ae56
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8edde1216737cf6d70c8c3a2c0e4d841f4b249dd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02f30244eec391cb6ba62b4bcfe65360be8cd30ec8061a16ebcb214df995f194
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e81c54d7fbd0e7eb601f6f717aadb39a2eb22694
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0e2f8dac8b666cebba38aed39f72c6a02f1278acfa57a89acd415a2079b30af9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1c55ee4736b625315dbbd15c537055d51cc22ca8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3138878c664e9e83696560f9424481ecbcfbe28485962bdee8d461b5687809b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e4381a7fe53a127d16547db728e520b99be9091c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f74a5c487a09ff97fb4c943a665200337f3820c23743032821e49a65e44d9c1a
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.54.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d13a1c1dae6c0a0852c310fff45c3c7d364be16
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:704dc8e853fc9743a80c8c1016bc1e30c12ea9fbbe2f028d25dd30b674f23d3d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.55.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9121908da9a12663962fb8e35d729b9681a1e161
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9826e8dac450e6ced954171873ec3f4e1608d36e28df91be8cb1b237d69b1036
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.55.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.55.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b7ae832460d4e6618c054f2f924e3f7728049155
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8d9f75c92458b42f8cc10bbb23cbc465a52510eb11799f1e23bd55b0d738d8b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.55.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.55.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..df1e17a3a1f56c403c10682fd7761b4f595f586e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f502da201058b4af5b3dccfaf985899b626b442e1f30a5e79f02fb468273cd08
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0ded2b5c28143cb7f7deebada95355cda431cf33
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:379bed396d8fe4ba3d6b84991b4eaa905e6fa8fe4f378a8ea9a3367955f28a73
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a7adcc3f76e35b071b097d00196681a15646a0f6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3ac200ba3bdfd4ad6c06934336b14900a0ea01f7237b183d12b0823de04b590
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6b1aca271711adcc11e4767c947068a9bedec4f1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddb73c193ff27cbb453f5062775d2e13900624f4ec589db83f60d142eed54534
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1155d1c6b0260d77b257f557a8af17d3b6375980
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dae6544c2dc8e069fe3fcee241e8ffc1ecde181e347a3bd18e0044ab78f417dd
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e8c2ecd0f6ad051301a46ce4184bd1767d9539f4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:075654c39770673623174df0a2308e1274c630fd80e3101ef375d333538b5e3a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0a94c8f192913d1487befdacbcf031a8d5db9d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0074bf70767a131a67b7f6529a8931fbe68d085e17d5da2525bc930970dd2d80
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0c8928896e21998e36d30042a09af9b2033d0c52
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c45998fe072890d990a9ec2e04993d1ba5a944136cb3a8bf419195e34a48959b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..02b4447d2c29882b02bdf038e74b60ab10d80516
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d5fec23e45276bd9be8ed5a7846dc3df5c7e1365f1e14334970e145f9c9b6af
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4d433f5767e5af3b82c950f8dcaeb4bf0516545e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9e27bc72b4324dcf2a2fc164d8a8aa65074f2c459a1c1d21b8013b6350e166f
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a1bc367ac8d9f9fef4ef6583dd20d753dcb3e980
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a24d559cc625fe943eb1bf4a170c15067f07492b6d33590f8f3c1b349647cf10
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5bba99528deb4e91b32d0cd865f0d13d056882c9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0edf536c890e2edf74d9399e273857668a96a12c4b8d44a532fb85793f5c1ac4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cd780bc77b91b9251d8263c4aa802195cf2ab210
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2be6fffeaa08c187a06673667037b2385796d4a75fe4228f57c181140e07a9b9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..429577cc1b192ccdd07f3a71f22f334d26179617
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:071102a1c90f2a123cfd6ecf5c32e71586c87ce07e7b0faca227742d21a5ff40
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..90fea71f06247ebd8cc19edd84dcd6935d106642
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6a9d2f62c173f230c586723f5652ca628be06a5a6cf7c68652df99a92f411b8
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e71f866d9800c070c56b3b54f67951070564cefa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fb654b5309631fb49cade91f7e2e3abca81fbecce2c80cc9109b9c91df6a378
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4db0fb3aaae8ab4a4a8dec14bdd941e15772bc77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcca64507fc925bc062ec3f102543b86d390113d677bbd9f615eef869a57b915
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a8e5966a731fd8ebf60631e0df26aee56ab8bfc0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15986fdab84b8c791747acc0395baa8e572474f86df1cb649701fcfdd818168f
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e9ba44f8a3c07144b87da7101c62aebbcfbb76de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:593205c650113de3c36f151cf9b1875588ebbb7067e0c68dd0753d8c1de88c17
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a246370aefdc186233f5a5193df1cef07c9d84d1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1215216ec8b4d6b67b7ef68aca060166445caccbd6f9f25d698b035b5e602de5
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..65e01c9064f8482c27e17d184745a92c75db367b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d41253787a936a8fcb72b37b6a6aa7e0208bb9d9e1e692935bf6b1265f4ee3f
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..763f6a9e2c7d08b734958b41574ca830cfaeb6cf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d1f2a619069f096721af4c808a73dd598b51601dc9949bb74f4a5c086b6813ee
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5735ace1455bc78740f4265b0d12bf3143df5e0c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3353fee773167c2188c3ca1e32842dc01eaa4f8a6b0783a1c660eb52d0ea6db9
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f270fe0bbe0b6d075f2ceb7ef00a9b2ad44138f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6883fe4399e22cba0db6c14477c58ec23ce234b524ec43c04a4f27606412cc03
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..83ffbb84ae70790b832fafa5ec393ab616e1c53e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:14af8bc451ba84312cc87dd9b8ef63a0cde90956c8b2a41cecee60c0075386bc
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..740245319d459124542b0c414964e4d2dbc207ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee68801ba9a6333a3522d75e876abfe182e5369537b7ca76d082c824e931b923
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4737daa94525081e0d45b25fa543b0bd7a0b1b7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:770226acd3e907f36911829aaf1bda9a3ed5f5a51a07e87f89d0ff9bacfaf9c2
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f8b08c93bf1fe7d09e5e840328cfe86e7f4b07ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fcd1e278e64db12bfcd2a9ed9a9451e89bb41772ff63d0d2b883be55ac10e909
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..725df3ed5d1f58bde6bb59746d1c0cc4d30b4bf5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40fda6d1505100a255f2bd757fe1adeee0da0fa496cc68e1d8df3e6c7a903532
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..51f0b17bca844d5567fdd86ecccb5d16d4bf8115
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:523e04338f894c5d17935a90ced49738dcd0bc005d095e83b83caf27de3c3466
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..40f78f5667b98fafd2c286e8305c7d1f852e9da4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00e2614d744a0118a9c556d613b8cd6e9d190d078cbc570b3e80650cdbea4195
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a4d0d2023cb9ebfa0f06320b997a6a6f18f5a550
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9acc788a7516f1affc51cd853a2781428eefd83eb1172492dd96ab3b539ccb2e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..763364b22fd6a40d1ae3cfe83210bc553e14e387
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b1cc5ce3229d5ea4a7c6cd1baa843ca8bf994e213e0414e4e0d7d86cc98ba78d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9d1e407761b13abf515e58443572cf3cf0cac4fb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68318308d8941623071e1ee27a8a9b5468c0e988fe147589b2cb8945f8334c14
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f794d50e03465d765a8907efcc7c752257e92aa7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31d4c8b33d87ff1330259d9afcd1ac2a40f1c60e0055088c728b8e8acdeb3ca3
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0c03fa2e762964d69ec09ca5d6672de8677f9be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f4fb41bbadf19e7830094da292d170b80fe3d33622036d2aa4513acedd61c79
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e256c40ba239b0c3ec83f9ed2dcf82d12c12b8af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7eb22a01280b38ccd20c74cbacb8591b6655db7576a44bc279af8111e9dc8ba7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..229b2bccb8c7a6fd659b1b2a0e89e6b84fe2e506
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9539eb26c092d6468af716d5286a65ae067b5b13c50aa24acbeee778330d051
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..44190e1bbff537f28052b5807156d45250fcf68b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:783a971f17f3d408d1a906e951b0bc43422c3fc3378cd20aefbda87c2b221069
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e278ebe21425037a77309bc2eb768328fec09739
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97c712a914672d859c612a17384cc4c300f43d46a01f8acf57d1eee7c88d0fd7
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..417433c0494624f4cd878233fc1570f8ce9024b0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0dc9f7c00e01f42aeb050849c1f5db835da0e4930769e958783731f57f49b9f9
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7caf7910f42070875654c71cd117d7fc17522061
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9cd9a40124d45b0341eb3ef06517b05d13e3552ddde07c8368e480da3e914f7
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c55e307d1a2e1199b350c7f808fc75f1df84a88
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d6645f15fb64820dbc775a3de14d00499b3af20d65efdf5a45bded94771c693c
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e931cc242cb7b679fdf98bcf1ee9efb649df78ab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d473d5d34ae32a98ff631132633a90bfdf62380c443473fb3d1005654804cbed
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d8bf852b1410a1873452477a0ef9cb6dfb934b89
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:574443792929055cedb9ce542bc04e274a33b40b652e408e76a77fdb5dda3564
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bdc4d7c13f8e556e5ac2043d0fc9f76b5c808074
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d8adb84c11d836f1c0cfb8fa3e7e528b4a582a8da2e224136dabbeeafd298f5
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c22239fd8096cba285cb0b21a50c03a6ca413d98
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b86ed338dcde2a538b94127c63ccbbbff17d927c147b20d7917de9859764418f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..89f4157a4b77d7b876604b66d08b5c7167f6ba00
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bfea77edea11c878cf3881873d732b884bd9a6f5ada1912295ae63d1b5fb559
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63dcdbfa9a4ef183b5e2e7bb6c7501006cc4959f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35845fb767e142747288a39e1dc57aa10e17f58efa1d8bf855f38e161aaae866
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..447c345b4f88a71191fe37a0b571f6596d6fdfba
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:44b52551fadc23b90df2797a2cf6e348e5602c44eb50267fdcc77234e283ea40
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..180af1a9a1640e3e95d8dae5ccc504b4a45b8a27
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01dd58c1301252f4bcb1c2e4973203a957119b57238ea27ec2a359e991f075c8
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9bef33256ce3df37549e5eb9a6f7145f66f8bab1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c664c63361b88fcd1c36385db8b2af13f27ca71356c5c9b5c814c28a0055356
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d338e344620bd70a272c58b0643eef28b257d04f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fd1d1c958e9752b1cf558079d5304fc64e34f940771c783acc36207209383f3
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22dfee83111cc6c8b50dfa36c650fa33fe589fe7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e49d86996ff2afa21857b050ed3ff690b123807f76a0cde146b2a9acbf24e11
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..27598ce0e038398cad45b0a1ea4867444377d5d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19e5fd88f000b97e55d0919dc4aeb5095fac9658999f850c0220a57bbfd602bb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5998952c31f551c03d6b88fb72a315b686f72165
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a07148510223a9c577ff33ce5a828eeec969c928d44fecc3572dcca209cef457
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bc13fa47aab7a5e2f60bf80906a6871d35cddcb7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ba3ba95a4c5efff95e9286e5fcc7261f72f871e69c2d30a5fa6cfcb8726dec6
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.55.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3676d057132f9ca679a2bfd4125a372c85d81c02
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a75c53d0325d17d06cca57a4238e430f18f0947a2f3bc0fe96e45178cf92772
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.6.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..50b8a15bc3d46ffb4f233cbad5b050208c815b82
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb41c23c7a62db2f2b01d4087775b4f9858ab2918fd9a083d652e28189c9ce27
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.6.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.6.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dd65153f0d761b26b9dd6773bde62e9c93505193
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7781020e019aed6c9098f2d965495aa8fa2243d3675fec2f929bb499bf67bd2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.6.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.6.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..14c8972ecc796eeada05ddc5f01db2b57c07886c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:885232f061160c4a5bd1009cf6915544b0f2df36d0cc2ee88a61e0394c4bcc5f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d65680facc0fcc08f9cf02ef3c3eca5c4a87481
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11c7e6042f758200f53037bb8d4d2a3937765e87bfbbc3e75c059956240b7b24
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c6bfcaba0f6d1aa16e193ba5b43445659aa772a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6543e453122a8fc010939407e26c71428f9b142509e43d580ef7e66ca0586139
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8174741bed9ba8a9611b34fea3d2fe5132544986
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f323e41eafbd7571d6ba6bc979f0f24f994fe68c229a1e0ae875de1e125b2513
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c2318c2b39f67c3f6171c8c1aebdfd326ccd640b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c5429509fc52e1fce3c281d9e20616f5de8dbc12309953a1b7a60d4c714d710
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0285ae366ad97e5aa06dc561ef5eb70db74a1e4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71dd0a3e4cab1d239efe352c96f107b84b6b99fb673a55315efe743775ee1fd3
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59ad69b3a037d7d8305edbf811fa622913a68e39
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:861101179db2d79110fcfa738e9be64bd2fa47f88cffd809037224f75e637597
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9b2c17ddb86e3cc52f3ef672fa765f41b614d19f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4a2bb1d10a72c866e8899fd579a79e4d9cb70e6674e6476bc49ef9805e5241e
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..024f72a68996201a5fba28f5dd3772c63f7ac11f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30dc56783c0216d3bf2cc2405c034100a98d97a83d983f7f8337c440ffc7f382
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8e74ba488e31e854e84c2a0d7446c7914a7e240e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3013116827de39ac44d156c0e2c6bb89fd880c5ddeb28df54c40f2e49aa21cc
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9b1949155e7ac6e5b7826566f436d7d7e347887
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31852cab4c383bedad5ed2ae49c318cdc48488cf782bf66d8ef02275e3e54315
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..107284994092f3cabef1978763f364a6caabf20e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc7c68ea6e8a3d3e0277c45be8f8211f1b6fd847d69db0660719427ff2aaf6d4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3e70042f96e0d6bddf1ceb03536f6a8bb1fb9c39
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6f60a038429207b8eeb6168dd7102a3b8adc994849b83b427632dc33b4d1d55
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6d442a09e838523fd04301c75a531391a323ba6f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0cc026d072f177076bb974a0af1b4d0ae257d9cb46171d2be2bc4adf8a0e6bfd
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19dd316532594c03daf019f8499029fdcc59446e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:759bfd3357b2f4408d63aaf0f5576d95d59743427eb77439a04303340a259a89
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c229a302f0839a8efcfacdaae9a529784a8840af
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a405a4d20b8c40469fe9a22b6f708e67bb1b3d8f770d4f7bf3066233625eb4f
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f90606cc04193a4f1412bf86e47c6c377f5e68bd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:941aad02d67d42bbb6f0020bc717f826f0563d85e9f01ce74ee17593086b3beb
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d69ce470eb18218592fb37d46c83369936264ade
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a2584eae711466c3a631f716ab6167cef471e039a019c88ad034e6ab44ecea4
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d75976127b3b77e01935fd1bba1da93d6136287
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d7a613525dc6f04b7c4d97b9d7cd8c230674c983716267b4504649fee83ed86
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c684e7e9fbc53bfe8c1ec654f61b509484158587
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af005306bfe3d348b8ac75d62227390cb8f47720448e9aeca2718bb0d5d234b2
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..debc1522de60e1572024f74db5358a07aa3d6029
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b939b0ebc1cee06ad4a03b5fc040d6b481f04c5a383f3937ccbf3a07c4462da
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..007bf91cdc9dd3c527c8564fbdaed14c588d2a27
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f10a99e892e3a103cca131d37a7529b2742f59c9e25e470b71bcd52ff1065693
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8db4dfb44a688cb3dc127b58bac0b12c5ea9e852
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5ae777d3a138ac01ce18585b2f45d3b53fec25377f2ae132cd8fe220761790b
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..62f0022e87b337b04bae88fe0219d440451371e9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df510210a820fa35274a1512604a97fca6db043676afc6409d021014ee03307d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2bea59140691c15ba61928525aa3ef4eb71f9362
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a39e4085fd455c8415a5591047eb039a9d8079333e3da23198058fd412c00a61
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2bcc50529e75617e0d9d0a16a97e9974a98f2c77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d93da8446e68c49fe039da42eb3724adc50e46b1917753df84d6f7720ec8aa4
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f6a51edf7d6f184585abfec5d4ca795bfbab8e2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78d8a9345f6efcd5e1a55887e911ef49494cebba0bf79d3db082fa018a9dd381
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..366468f09c00d4f2f739b39c9605c7b35fbfb7d5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc41b23ab974ab796318801d6d1e40c309b28b273c83650435e09b736bb7a1bb
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3b8ef0f6df7886b584598150bfb90034e5efd114
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed6628f28c6312509cf8d4f07d5805994df48211cd0287acb7d3e070caffa5ae
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..29745c279ad2cd0c4976bb66e81f93036514537c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd0b488b96f346981640c7ea1fb7878da0cec61200ebb05f6df659d0f722bc4f
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5bd82200a401b9a1a1c262a48f8e2f81846955f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34f4018465736ebe4df49302aa90020429198e3c64a76618435eb5aa8164bcb4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..accae168588841751a06b2a859c794defa494139
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:298676d282df3ccf2e7e2095509d7cb73678dbbc8e4b9bbd96d38f0eaac4e967
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0d27f8ab93da29652e5dbc4be035be4ceddf72f7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a74fb4b4c3aacceef5d45b88119ce47d5b38b06d86a6e4003e4a667e66d70f9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..37c8e9bdf69ff9dda7d13fd4eed0a5d01f11c176
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9898c65a4d73b8e785541561db2eef88be4985e2b0b5f4186eeb0443f804017
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2f00d4a55cbe885eeb5b816ab7389a25c88acbfd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8ba9ac11b8c33e3eb386c3a38634b92917521b108fb2fbf52a356496f5d5c5b
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..73401b49ab28d5f8ea783351764604dea0853fc3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf4e66b0acaadd6233ec34d3710dd6b5a048923a444e3dae4e3001bd93a810a6
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..79dfbd1c039acbb101df4cf22c80165d1227e875
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:891c65623fe0b2c91e81add1b193b010395933369cb5af3121e8ec6b5a991855
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1a02261fd02e154b275157be2a4d96bd6a0eef02
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2366963291453caa3b41c1cc48178075c129d58694719da595ffd52c6f72fdfc
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8da7fa08569c7916097ffdc4f37243444528b032
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8aca6d6c2ab3f6d89555694d25784e382e3a499896a6a32c974ddbabf70ba7a9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39f866217a859025741a91be31b033e12e57c04b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1b432b9c7ccfa0eb7ba2757f83ef1477642ad4b9fbb365c2482d47ddeb62556
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c887e85e4337c3a369f934fa9e16bacd424f5ac8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:358ce5c2a7d9caf56c0fd926b5de0166c951bb80e2cb43b0459fe0c8d3fb805e
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..829cad50889063341eb20ae3d196d3b4884424cc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d81a9748f07cc63a33aac0de0bef3c64688f1acc8d3f4fb52f9f3223582d49dd
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a879a937ac8b2a12ffa9005121d792ccba9fa78c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57abdb001460d04ea46ea9aca23b2c587e398ceeceb87b8ee00a09d7807f193f
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..db2dd27a7de351ae4c001eecfffe72959872f57b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:683d9bfe22018112cde1d0876260755c4ca792b8e449228a92529637028ea04a
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b0e310476223a61ea7e66b914e8189b4745a4ea5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b76ace36081cbf88877b03e45bea56677ad14c64a7b7dcc5790a0319f5b6de21
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a36d3f5be50a671de8f592a78406d74128ac197
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:901a7153e8853dac5cf2afa171a89f07bbc14c53d97992633426626c2befabd0
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..71b99b4888e49530e70ef2d83bedc937cda0dc69
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef5c404add423b64f06ce6ab53eb1a85245406ba139a9d17f408919298ced9f6
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f4dbab53671292c98d9f331b437a5aad70606922
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bc07415713b8d3221a5d6c3246bc4152732a1d933e4b99bbb09cbff8a54987b
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..978c44706e63b890b86113edbf0da00891d743f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76ae0568d17dfed343b8ce1cc9f248f4d45d6761b5561734893ab19ca64d4e30
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..41d9ec7826b688f6993ef9af2037d3a6a2289dfa
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59d1674bc7d822ff2c37c6d11d803e20b9433ba49319080a1e867437a0507876
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f6fd82e973aeefa509b8a3fbf84573d413359ad
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a10c0bccaa173be47982589a47034cc192e22f68fab044a4699c9810062959d7
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..965fc7f085bd3efe204cde78e12e65aceab2744e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f17d3c271b0438065d8ee48e97bd625cbeffea0648c3cd9fd3d4db263a8df189
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..96ff741c33ed9a6413a52de6d2ba73ede7fa8504
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e2aa72c3cda352a8d0bb15d30f154edc49b8ef2d638ef282215e36d40f486ba
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..63abdeabcd6d92f09c921402090e971bf6e48497
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:586bdfa68a987d35fe3285ec408b2044094c743a036a3c2913a48654d3e34a2d
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a8131a87a67fcb532300858c7d43665dd1b2f57
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ea22639f3795039c15f40a9d359e9ba5da9d9d4cfb45c0dcd2d6bd6f9bebfa7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..646c374aa136d8dcb199b72b8bb1d70c450ed534
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a87dd5679644a00f294e96b516a5a69fce0d696e863819cd483f9d122cc075e0
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..09ed2a6f1124945505b76c18d1efc2837785af38
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df92818540a71e9bef803371775c92e8ea9298b93b065a9cd9da181c0be3b6b3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.6.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e0ad923d1f265e07d99a2ea53e8358dbdbafedac
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7259ee87beed80b31f18f0912fd78ae3ca4cb7afa39b4d2f28a2975f0092b663
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.7.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e572ad158c69a13e96bcd8ac3f0c8d5e46eb2652
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13464ec92cb84843f5e0ed9299415b5ca1130eeb6b6f1d69fd80cdb066267772
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.7.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.7.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ffe249a738a8e302042f24c90918d1e7da47b100
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67a6905b89a7f0a347a3e44dac05e406f8754959483b2ae14ab90afaa45cb64d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.7.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.7.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..05a9d30fc9951e20b40f55876d83d08a60cfddfc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cac882b4deb4f08eca57bec333fa5bbba7caf95bb7ec144c2b610ae0b3557edb
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c882978937538bda7a81de2ec9860521476e46d3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:499cf0b9df4cb2cfb018af81297c38a93cc88ef02b77c2177abb5bca5f838aeb
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c4acef5583a9f8c1223165d0aba22d0167d9474e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8bc5656b2d8c1e11c6545c671e855309d0593dedc019104d152068c3bad31f8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f9781da6ed5d41939d8843609fa5a00c1c6b5856
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d5837b22e2316d085de145719811472b2ceda8a35b4055968993da9a1487855
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4a5da9d171c55f666cf55b721722d7a4652f0c0c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64c92dfa664e83b1ae7cc16fa39a430535185b18b6f114c16fadfec8713f07cf
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..838c7f5c4cc8ad92e705ce10e6833d57174f9d9d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:596fb6fd06c568b41dd96723c8ee3eb81ac9267d5b920fa4e49496ac24bd1f52
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..87b37e56e974e24486c12c5858ad64f31f3f4a0d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee6711ea31cbf390d7b12465022ef89ec11de72147068c32340753a7fcf56095
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a5bbd3a4d68868b9fba7e62fc689504443f94b5d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41c60605d91ccbae8231d52519228b6e9d7d64346d35bbb83c3cdd1f23aa73d5
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..17f3d215275f3b26d31771aa9fe4dc3175ee6949
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0928bfebb84e4335c763e595ebce19b97db15f1aba5ef2bb8511cd2acf011d2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc32af839fb80a4eb83b64fcc11d8ddfa0acef71
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92065a6ed7de950f68def6a9347a3e28bd66551e2ee1f8faca523f9078682ed9
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d265d579d4c0067828399aa7d1502e05737ff939
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46b65978c0d7fdaba6a5211956c83848292b79c735e0f85dd4fe1a1ddab2df02
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d47a4fc2661ba1af9a6234f940b096f5571eec9c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64c40b276ff369b16bf48575ff67c37d29ffa316fa6676772489a60e3e796fb5
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22910849a7d95e80ff7ee8819e6fbf8dde03adb9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d41e442162ed98da886fcd0223e613a17f7841642b9fbbdd632acdba25dec793
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..47eab7865e5ff45932c3ef9c26ce5aada2a438b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3e2c684f0af172f8ad888e1f578de89b1bdce0071ae9ebca26c873d9f75e93c
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3477cc568dcefadf523ec22a99ca03d1d99c0ae4
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8be0849518864f7d6b0f082256affb2b9ea8e6916211a8ef171a356dfcbd67b
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b8b2d76c599c59aa262262619bac5ed4668fcf7e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:990782960475e411803cf52ef69ad5fc8cc7a24f80bc73908c438c01de354a28
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e95192853aa877217e90e6ac88a058d5573b8cd1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e31223e06b6ff80b899238ed99c2dd54354a227cb20f514f57468127a8d4b90
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..316208b7b7b89fe27c9fb4bdfc7baa6d835aa71d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9d87f029b07ceaeb87e3b002bd200b66ac2d726269c60e456d2156d7081637b
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..181c00f9d284292812da761dc67972adde537bea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbc5172102d7132cdb1439d8f866faf98b1e2d0c1b4dc70a5fbe763b53a348e7
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..aa5a73032a7c74566bf697dbc7a2afd771fb2b0f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b11390502a8a33c5015162ea6b1ac37451551cd1807f1e92c2afa6ead72e4617
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3608780a36097c612f7e64831939181362722ab6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9608558c5a78e11ef09c8dfaf16a990eb26386dc2f7e0e8d309b01b258beacdd
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0ef6feddde7ec2f9c06a4653d433808c15b65d3c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3782d0e367308bdf230e23cb0a4f0e935f6c311d94a562a29f744089f79a0fa9
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3f30f58c18b71f744c7dc18e73dbad1a9c195941
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59ddf679511bb7ca18ce967409ef93caeaa55ff4804738ef77db494044b21f7f
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b05dfeecaa47681524334fcc85e63091fad2c6e8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ffafbfc06f20649c292933a59388fcaee4d3c4d3d3d27b31b6909bc43eebca2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7425fad090883b574dd510bb21a0937a310b8694
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:960f5b648341f6844cc8cc0a098f03af6eda7541ba1c570316ef77f98f452674
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..45a23cf5e5f32affab0392aa71446731d53d216d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dfba00a5401986187a438229eec07fcd42402459c5042ec9b5b9a97bdc58b9ea
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..389f733880ce99803e27a501d3212d21a2144fbf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:deaf14079b57e447a118b436f92d724b1ef764a7587b067bb46519b97dfc10cc
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8a7b657bcd388742ccb0065a097ca9de71555c53
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a667fcd31d35d7a45075ef27dd3d3ab7b4c1b50d2046f2e37cbe2f9d52ca256a
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f46ae06f2ac11c1dd6416d62c04e91c9c8b5dbd1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41fd13b336ebe2cf3cc99fcc968ffd515592a2b39dbc8beea6e0e658dd8db01f
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..86ac6489e9d2a0f015d0455d3c4f23eeff99d188
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3ebd44d452daf0d0845790834a8c8efb965456f2c926ffb9d40942e6d620322
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a3fb13f2654de5f15b97b547269b423270346b9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05d6399a6c226bc3c42dbc856bf78c89262f2790d83fb30cd6c9c3b2dbc3de38
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9a36be2f2f5e4aad7f2b3d751d4c832c4d9cb04f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49a60890094cb3bf20195ebca00ceff69f6cb024b887dc44e72b64b259ca6424
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6c1d8a67c69d2ae594c672d468aeb6b38dbe8b83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d8f0eeed33f8992e8352e26a62db9a966157ef9382ba908457d7c002fe32ef3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..386ffcd0dcba3ce912ace773185e1f795057fa05
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eb632f04a6d6d48109c8bd983a431a71e633354d4f3a8570309424e306cdfa85
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5614e4799228fc2258d8e6de7857c34d5d23c9b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb6c76115014b5b9bae8045723a3477258950a66bb535edaa41ae8b4e53add57
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d7df28e265404f5fcc0f87be0e6307cae9c65301
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74d490f46aee8278a99cb05377722288f58d075da53bd0a7ff63512377a0c010
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a496096796d191a35ea4331699814fd12efc888
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7093f87661032b26c0b6724c1ccdede7e5ff047f2da398d515bf5657f28b2d4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..47e9b7ac43aa2c72cd1fb6d9a67be2e3cbdc75f5
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c65d739e19deaf60fa5bec3e192c130e301ea11890167685af9868c23036504
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..af201dfa93d3ee5158400247d628bb72e2420a34
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d37567cccb4319823c2daddf5820288442783c6abda7006960893b0197e08166
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..39b5ee0bae10d625b312a92a830bd07a9daf6286
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76a3fb9274bf31c9a7ec04c2de6282300fca34cc139839de0ce2d412fc336208
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..693e345537140e3d30f888d9aea79e5b2cd1faf0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16af0cee376bfd19f2b353945eb8e2c03ff409a6d70341ca9a696eb60f1076ca
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a75b112baa6b0cc359ff0252debe1c0c235ecb79
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b2ff16387223b5980e5fb28d0d8cbccdd5abc55b1ce2ab85bda1013312508e6
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9875438d7f999d93cf7491d0f5b85cd46cbd092e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67cc64968f74e77b9106fae76e9a983143d192b230f13f7517820ee4a57326f1
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e6364ea5e005f5210e78508e4fc5de3bbff73de
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ca1cbed787d8762567b026a3d39d1a98c7fc24203572ec7bbeb582fe26f1dae
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..40da6c51dac8d12900ecd899e1297107064d192e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2baaf439550ce193c6825ee2c1a0ec7132659466c7e469c6aabb1b200a6cb3f
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c630799133c9bd25717ff05aa86ac0671b2151fc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26775392a5b05786ae40f840808c9f8c243a3dd2924dbb43658fb91e241e63d2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46ef058e1a34d280d8b7454cec742fe1ba7bd6be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:069cfe5c30720c702e3d1999c2dc25d89ef63743aa1d6278bbead5a64cd1ab96
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f3bba499d4af76c94877f4ef73752375034eb287
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:077a34225492c096459c85227b73f9d70767af0d1c3b8ed739aecae9f93d8b27
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e83316c8a481adce3b666abd658c19e2ad955f9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7254671cf3a807d7af030aa5bcad4d2e1460a82c239b3437f48589f0e2cd4d6
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e756f9122f37171837669544d124276cfe8ad6f2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b0f2da7dccd2f7fc5fa8aeb600543a4f66cee7ed6808c9bf2a3eab401635e3b
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dd69b8c852fbc79084185423552c25f5e8d6a623
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d06e2f613deddfa82e13a75aa735fce83341114f578298116a25422b621a2fd9
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4f507595669d5c1c8ed6f5b340363a8ca1325132
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4d7e275c40e6cf4c2abf5c19f54baa52ed874f278c81c7cd9a252d7f0e23b6f
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..18abe866d489c223b19e2e46f4df22d7925224ca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e5fd6de436d8be30dc4bbeadd861785356b727283fa3d51bb48efe4151d1e3c
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7a5207a7091b9c316d8df32abe704875bc37848f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c32bb424ed9999fe645466405ea02528e86a515b2a403dbdbc6383dfccbf8ca
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3e13cc6bcd811ee01cb2393e64257d0aea9ee3ef
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:896677326bdbbffffef12020278280c3154b11c0ce518563670eec292e550290
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..760b7c96729bbaa237ec002fb9858f5d8a8fa498
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6eac2d0a99eb63dc1b470888829428f1b2d9d1bc73f187c22532aa942ba6541
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04cc5846ad31e1b86e1ea4eda87c6ee4e4e8bdfb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da4c537a11d7c566683cda238f70222d365ed2ab8c802cc54e352d6112be2eb8
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.7.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74eee1bf3d81ce46f9ccc3378f6ede9cdd99c2a0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49bd9e25f7a43dd9897559dbb776105a48fcc19c582a14830aa8c3065ceaaef3
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.8.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..46e252bd1fc61e6fba93f3315d6b5aafd7ea58d8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac52dba092187730520919a5021f7693433a1f0b1156489cea27b1dd4ca3db19
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.8.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.8.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4bcbff9bf3a6f49d727a7e7c26ea0d9845dd7111
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a5294513289e50c7d842b9937d73d5cd0f06a03f6cce41d59eb75e3340a6044
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.8.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.8.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e6de91aa920b02789badc845f1bdd386cccb5ff
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca78b1e5983d16e3d875db431e47d79fdb83a61e4b9d786395f6fc76d26e23e6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4ec9b5855ede9118d5d789f7e4e89d48177f173c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e4acb7eaec236a4919fdf989e055ca04d03178999e349bdc25b5458056f1bdf
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dbaa79567422357c633037bbae5492ba86f965d7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f2162c0ad2723474aa02398c451b036bdc9ed7fca96dd600e1e79c1e56240cd
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c7b1754791d495f6b816e4a864e2b06a5209b8cb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b774ba7f95d1dd843a6532fda61859a1884de105748d3fae8cb58b5124557300
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a9e2c4e2212d080dce086f5195ac9d720703c7ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7d73a35edb1052cb38acd4fefde3f5cdb62569f90aa43dccdfc6e6baedd720f
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..04c94a37e0da18ca10a84c3c5211574931179ec2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27c012228a0d3108a5c91f336f18973835b835840592608290432161e3e5f40a
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d344b2d080c2b4efae85ed45b55b8079087c5d16
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07fba3a9de391e5ba70a1337fc2da28bbe6568561e364ec3a8079368a1d3aa98
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3c075927fb7994e391b5303d882970778a4c51f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e12965a82e9f897868326eaeedfec0532d7bdb2849c931a05db5e7c25875b044
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..496693130da112de4a239635b6da23a463d71187
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:249a52394d3d70fe85760f81c33bc68d61d7cb72f9b038cab269193eb7738949
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a829a39df71123975b92646b12647be964d58fce
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b8c6783a868a5c663363b32cea84adc4b990170d6349375fa1e30d092b2cb9d
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..99bb2ac5472b156fef49ef36c7252b2b23c89303
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af21107905d51413fd8f501d13f3150f9a0a4e515966f6b142fd5bf091e70e81
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0a6d12eb2f6de2e4603374ad458ec5a8c6e5d2d0
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c273fa719b62ced2e3435bdca966b258a485f260874f4f71d41a839d65d66bc3
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..70e91f63beff53482893715aa5f95956c047969a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5fda7e432bd1300df4854de2590fff5361f675bb16e90736caaec3a763980de
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dd0d4595f69483248abe7dec788c934369f817ea
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b81d61bac516529294548b9970585312ea08d5840b3ba35287370d509d33bc9
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..19ccfcdc5eda10f512c74159ea42d7cc3ec14979
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea0b7f7d6f70791ffb09646fc9788c2a9d446957514a3eb7c985b2820d709d6a
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2133f3ace92e6ce9e2b1e381ab6e45b5c332bd2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f075ed6f33ba84cd6a829dc803b0bad13dc44b3197e5ab0af87b8f556189ccc
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cdaf59b268f4d824aa8cbe2f5db5036336bebe0b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13a82f9353747043aa82a148e197bca7eb2b6c5dd542821184ac6a7703bb99e6
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7c0aa0d84cc801c8d88f27748027d7fe7b77c435
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5e68983b9eefc0086bf64f613cef2d936dbd014d1c52717f2836fc1ef0547de
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..862902502a9e41d5d91bb426f7a269f386e68477
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6d0d1b067893d027f6ac7164db6faaa124aa3b888410a665b44d9f7e2bc1ae4
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5f5c62001bd2f8625eb3b2997aea290fb2c9f128
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae75fb512660a7cf650515b83d418474e82b31eb7951153299a722056302c4d7
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..74ebcb5fdc269a94749ab2ace8200b51430ccd86
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c68f1edb453e0938597b651340890953f0d67118e352ccb55585d570c7794103
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8d350aab17d08cc6d52062c4bfc5aa4e25b6723a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ff7e2dc5757d994ebe77c4d3ff53e44e39c20f3adb0132d9982882ce4e029d7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0b6a74d37d0233adf22770154bd413bb2edf198
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88f7b6dbcd88158da2b175d6415275cec279ec086cb27858b8f4ebc3f7a78141
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ed8d35988061e158c18b4b4917637229aa1a0b09
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9400edcb864ae83ce51fe8c4b6a978f4a4a7f79ec49ece22cc12c6a772eb360
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5fb5455d71673e6f1cc4fbd23734473af15ce4e8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:335521a26d9a7f0b214f5f3717493cca8b9f41ede27fa9154f32894043decae2
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..91eaf690b1435eb9a22c70eaec8772199c5a048d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6dd9a3e9e6b210073b83e13e3c224dcb91c6376426c9ba035e81e4b0144b02ed
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..9c95a4a7c4ba1dd2029ed410791bd6c3bc48a822
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e268f659e9c2e8382bcc06a178df7c045ca8ddee755bd76607495a218c35049
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2815ce4c0fb84eb49a79f47afcde8632da4a78ad
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2401b0bdcea9bd627727c377fc7b4c6158d0681283ec33f9d643cd8697f9428
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0c11e6d3005363a4e3a1585ecd51649d4f24731
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f81068c2a4d62b74909a6d19281ec6aedc23c3c9a1403fc4a283b2da30f78ff
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fc52459091868a9a6ee15b5c103ff0c92cdd0498
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc6ebe27a59fc5df388607746cbbc9d15e070b044e1ba360e19ac2e623203218
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..24f6565316542255e75b8dab8bc7245864226f2d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72b1bf11de4c90199b40d9259342bb01b8a84d5dd9a790d3381f2908c70c2727
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3376506d2e8b20a8be05af8fb36db4b1ebf77065
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bfa38a1251e029243063b78dd53552bd743558b1642d4a2ad557cf73430c6e3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..abacf785044fcb9bc89e725cc1a09a1ed83983ec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65eabb42488d465acc35de8b18bcee3a4e43569c6bdcb469dc77826bf7401045
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d0945eb99e9b70aa392a3d98233881eda200bb64
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99cd3f22ad3fa50a899b8af57d01ba21f73c5c4059b0b3620f9a2ce40100d499
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..38a65947a64c5e5da8cb85687cfb03af32d49d46
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6fae8dfa48abeb58343f7667297aec4f3c2dcad50346d02b4c228541c6f8b9cf
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..88f883140d4b344d65e060a7f4f9ba3f0cfb3858
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:360709c4bc02f5a60bbbd7481869b483086d8bed040b591c4008f2cbabaef291
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..34fc325141aac77f6f32bc4ff67f59d381068f77
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fdb29de8929f55914e16e77d367de0d831d447fd354c5c70d6059cec0354358
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c2852416607cf828cad09b445f76fbda3c757b8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a23de32abc8f61dc0f4f947cc35b4fac35953d421fd0c9e634c4ec90a8e1dd7
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6ad2f2822b24a7d3851d093ab7b6d15d5050e061
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b30470816f0868b4858d6e10d58f275abca8690b79e1260c7f1d319e8281629
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7e0c5737faa420bfecf6e44ca33daada2da95b62
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbb87bf84e09f9c10a4171b52323e0ef3c7eb1968610bb6bfb984747e51f3838
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..71be0c6d4b325b84ef8373462e344321357791cd
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1c32c51ec5e4e30754bba364b559c339343b9411d2d6e9ecd7157b6880f19c5
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..fd93abd1aa79cdd37125632785ffdfa562ab05be
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:637c67ba90b52cc567a35cc0f7f3dce5f5f18e2dcb0a398e91e4d4528d05aa8c
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10f76038e6f3bda1717c912880beeffd1718114c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c738b9de399f81e533170850ee9032bdc8cdb3a1bbb868363e371475a0a60810
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a5080c7bebf7e28fc89e321464275f55115a9ea8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6be0154b419175a9eeeba6cda70849d897c77d9c9cafb6ec52a4dd516ec0d751
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c64498de733fd4db210095c85d5b1a34964f67e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3a74d9cd8583a0adf2bf0693ede715d2a595055db2e298703bba94a03b6f648
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d3c96c38267282b0fb49ff08b8f87c372ab29a4c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8517c3446d0ee5823180019b77e26ede339ea6e4ca28ab182ce625a90f93b08d
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0c602285adc8c3265904e688b50dd7d5342a37b3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b26359ddcd5a6e0849861c7f23c55f19cb4f2d2dd037fdb2b413af864b97d99
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7dea61d2394849941ae664d3eab1696f46d6a219
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75820de7a8a6ef939daebfa043d0f17cba03b7996b943dd085e496196aba5ab3
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5c4155b097cb4be304c55b050d69505aa64f9b6e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bea231830ca0328d40040f53414d483e348a91f196d19797ada1f9f9f087a9d8
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e8d6313812672e22155ff6dedbe716207a65796a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb9f4380431068ceedb48baf33bb1d101dd935cc69441a9b1e16179abc6cae10
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..eba0ec0388c9c975a50be892404ff8fd9a226912
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a75ee457f86832a148884f169ce074b4ed15abd49489772fa438727409174d6
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..dd06966ad8fc283dfdaa149037d3af3495580cbb
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73d8ecfbe884fdc1276fe9f27839f1a5ba8b18ce1f4dfa96f168e9bac49c3fb8
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..98c5dafa97ee08c57f7d32176dbde89551e6c717
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0b069499e348098b0b38ced40999d80cba349192731718a33fdd3aaac9436ea
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..809e3ce056775009a8440eb967118a4945441d6f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3dc3f72273f18ffb5b1665fcfdf72a0011c5ab86ec0f56173d376692771ae57a
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..51abb47b2eb8dcbc9afd146812322c855b0cdcc2
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3038795624868cf03a420405c32602c6ae0f182f4da665439dbb2fac5122ab6
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a90532071574bb60a9aadb2ab6e0bb3e3eeea7d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11ea5bf2e92fd65974c5bfb76440e607f6845271ba70ef00c8cbe397675f1577
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66932896a42976dbac8dd10b3b77855525a26aca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:920d6741ddd33c4319af1defdc0303e05e0d7533fbce564fd73b3f0582371ca1
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.8.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.down_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.down_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..acae13791d25f8daebe59c287996aae3c92c22c1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.down_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df176207b5d7b3eb14b66bd55257d1206d583f8d8213fe43ce95366caee7d590
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.9.down_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.down_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..22ad4ad1b1e1da883d312042a095f4c2749ac456
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.down_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7355b6ec47bed6a4f179288f7e0571c48246b68870b738ca2ee89b03b76eb80a
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.9.down_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.9.down_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5b8ef65b863b48d27e101d5980e857a70ab75ed8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.down_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84d3073a75710cf169d273b2a4b14040ca0cfc6886e075513dc4043f92f1af92
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.9.down_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.9.down_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.down_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3fc1f607c7bf61c02d0fbfbd62cf8e1df73993bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:812b98ba200a4839430b20e0e3a10cd69b6ba18f9ca3e6c18d4cd8bceddeed91
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ccc026fc1b34e18a73f1e07a2b8772580f073910
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c478b710523063c2cec3cb094471028f1c9469bf83363551558ea05c72fb197
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8f5cd443a25b13632e8e5a91f2469189ad870c93
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9cc43f1f486b69c3a994399dfb3e1191924138df2dddf11d7fe3a95278876744
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.gate_up_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f7e466f8cfa40915223f101eca72dcc380114fd3
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:829f4964d8bd600ebcea29d3a96c676bc23a5c01745e7934e564427f57de7a98
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0ff39a39d6769fe7a342d8ce63d776e2cb05f91b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bfc67ef79654ccc223991a0f616fa3b1c4bc9a44ae5f97bfc1e1f4a49364ec3
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3d617c6ea1bbf1b386decfb8c986bfc7ee69cd1b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d2541628f8d0aec79144ec7114d5bf78c844351e7861e5d79e365c42265e50b
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.input_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..0ea7d5110277ec9d05f47ccadd41e21d2bf97af1
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58e1bcffc26d85bbe2e2ae202914f611582be4b4de47e043881962fa31ab5509
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7744640e531bc12adcac2e5c43320f49b5f06e27
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e1b8d316602a7fbebe1502dec66aab0307c910a4514393990ed14699bae1ba3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f18c56b9606a47946e66a61541c435ac358b362d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a715599aafef049ff818c08866bf22fba304d590dca235adf61d0301ff06723e
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.input_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..32a477f50980d7c33b77e60c23d5b6c954f4431a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7bcd4d0a9f542bd3b8ab2693af25b67bf978b2897ce3fe1a67fe9c6efb61e71
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a06445b4c2148e987843ab75397550021d5a5d83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2e2a21e8b8683900007a90962347ae4d2680b6447715d49a3ab64427f8082bc
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5d0eb341b1d58ca91e68dac035c8556be28d4fb6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:351684e668930a34362de21f2204ac85f0451ae79cba17f57f8da5f5b52ad984
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.down_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f929a9616510be3c3aa4dd51a66efcf197753ac8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef9d0c3fc56ead7e333ea378fcd845540ee893e77930572d8397bd15afb0b5e4
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c9f50f574a52a21b1e7753bea112c84480457eb7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07b86d009ba22dc6e0a8c683356276cc146f002f9b076ce580fe1d06a48a0ce5
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..056265b0956721e88dbbca9f80c007d53b8895c8
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9957cda7778372223174483b7a2856bc83cb0a34a5188a399496a72b83d11a5
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.gate_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b3c8c6113decbf9e8278ed03900180f35fa8c675
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64ec8e10520cf81f92402533069fd27346797b23f416c5bc2c22a0a270d62731
+size 36865244
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f569d146375d224e3a0983cf16565e1779e8aec7
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06ffdb33815dfa3677079dfb7b2e6aec6d205ea2298dc01ee8ddf31b6d3d8509
+size 36865259
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..573dac3af5f2b4fcfd056662d003550cf3086e71
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:844396fcdea565af20b6003207bb6e612561ebb5fedcfdfe3f69b6a73e471fef
+size 36865165
diff --git a/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.mlp.up_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ac68f4b63ca5a2aab2e8baddd1e10ec4de3c9d89
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:816554a0111ab4cba0761482672a2a93b02682b6361694338215e61ea3c52bc5
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..622b548e3632a1beae519c9b829e98686521021f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c09465a3cf7c882d1dbf4278383635edd51f34787faa0a481504726d291b45b
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66850e59fe9ef4807d81b467e33eea9c19b08f97
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91833ba74db7145894188415c461ffae85e0fe3eaa271418a2dd78240f0e214f
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/step.pt b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..52a03818deefd1dab96b23072b524047ee90f944
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de46f4c0f98f1f78028befe6bcaa638306ea675c89e26275e36503c9db9139f4
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0bbf8100a2b700a959ba9417307dcb6cf935a2f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f00b2cffaebf7736e036163d1c757f9c1cf18b2967e74b3319f91a826e8998cb
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..bf14a102781ddc915cfd5fb0ce903eb8444a1a0e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54c1a7406af3faa93df0b518b5ceecb4f4cbe1c42ac1abc8156bd66ca2c3c1eb
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/step.pt b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.post_attention_layernorm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a04556a5228e525efcc12999e7c1e8c8b9b3554f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2c17909d07a2b7b8238b6ea5a1520ef212b0ae78feee7f648ef961f15684d34
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..957382e13e3b151639015332b4e2a51fe36abc1a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76777366aaa9c4300061910295bf02731cb76b2fbb1a22038ece57636397b0c0
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1f7683993d70d244fe552cad231e14278c834614
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7dfaa7376f4606e5ce8992b55ffc58e5a6d71d2bf4e05bbf0b2025d7e150096
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7d88ca5984e545dae6de5e68cd4db834de8ef145
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5be242dd628c270fa664a9872b86619751cb8893455c56d962bfb2528fef69fc
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..77ba78165f367ed47ce65c6246dc76fad7568756
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1503151cb6226823eee1b7da50687797560527a099bc2b8ef1b73b7862844e12
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e878ecb98e25d0208bc1ccfd8a4edec2ceb29bfc
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92d039af8aeede1ff22d678399b62abc0a7939e926cac79e6169307810646a34
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb245e45451c859a20ad1d793f08385898e47052
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec1772c75637468bcc0cb4bb4c33dd2933cc647d62d3e6b9c66621a54f8d6977
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a4ee71604dd1b4211777eea08fb2c74c5dd9dd5f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9eb0750c72e95e4fa2f719856965f864932eb0b2662e104ecda2a4cb29d53001
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b90e90067a9fefc34942901e3fe3c20dce5dd98e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82d40ef6697feddf89dc4a7428fe99e0fd72b0482d53c64fd725c3e3c1cbc112
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.k_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f479f9e61a5389d42845dfbe5d8d0d5045d99590
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:950afceb1ea416aae56dc8b9d2774ffa6efb836c32325f572cd9050812e4d6c8
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..697f14b7ed60debab2d5e1223210df1fb122c49d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59549f4a82e939ba6152b587a23db60e0d20b8d822254192636ed79a9f8ba568
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..8532c7634f56c8c2a76f6ba0be6459519f39c6ec
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10f46256c857944b1bc827fda3ce91f85b197e9acb26be193a82dd6ad432db11
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..326f94f7d28272f84843bd4f2d2dc98fb31711ca
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abee98d8d553c5ac06631d36122a0c9b19bff48903c7a73b9a62516069c467f7
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ba76188300251da1ce85aed08a301b3c99ae33f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:424791583d98c369633d8184ff2cc6ba1dc4eea0f032d1c5aa3f282865929f42
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..05e52fc8163a84ec409621a24f32ea8b1e21759f
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a40e52312351783f879bed52ebcb698f751c9dce8581b7f1fadc31675aef2dc2
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.o_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..cb8e9be3af33769b4680880a7a1d5e7368c69582
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8cc048062806316c86b87ceb04b9161017797c01d7f9c77a83faee92f3531d0
+size 8860
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f1513402cee413ccaee5807e30da15083b563224
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f83c128ed9362716c13ccd1e360f2be44e8b399ed0a61a8a796687fa0dfb1645
+size 8875
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..98b2479e82c35bd599cdbe415fbe3a09faf5cd6c
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64313850231d24889b58b2c95d1362aa2231e34176b900ea4b5b7e3edcff304d
+size 8781
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f0d7fc3a6c54f0ffc2db7f6b2fd6050410e3fcb6
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c959a7dd1dd73cf6a1ecd6780ebe12e33744dae5d10cb2363f8d1b05e90a2bf1
+size 14746844
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3267c267dfc6c23949efeaec53d8ed9401dfbd16
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e92e28ebc9bae47fe92f534b96472f0a31ea6f08ba81d7cf9057cf6b18361c7e
+size 14746859
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..7bb3895f29281adafd9a0f6445576ce4d14e09e9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20a14f277cdc8bf7fbcec930febab21069c07f951a2a9183ae5cd6335c060c53
+size 14746765
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..66c1f8c0905f4b0cad243dd0b9307e478eb98b99
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:577d730b28192213a40a5b5beed78e5757bce9e67412dc78ea05dfe83dbf9aa2
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d2f0c4ce932273382c12b39df7af585fb9b75bab
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33ee493162587446cfabdec487a41344d43950d0555d740806c733f16fd0a1c3
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a0f8c19244663703b53687cffac0527573457d7b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e4f39661142be86f634f1cb71229ce36ee62dbb88cf365f6467948e92ded75d
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.q_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..59313662e9e30e65190791719384a011d063fd83
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37a49eb8580260db8f057bdc4480f39b7316e72efc639fdd878172f11551e7c6
+size 2716
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d02f039f57339aff702d5a15589056729eebc58e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:428666b72090ce6e245125780abf9db4fdbffb6a8058e668929fdf844d2e5a5f
+size 2731
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3fa3d7aeb31590bd8dc43901836e08226eebd65d
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58ae14a55c7cb50cab848ad897ed07be6e30aaf0e79fa4bcc31ca07ee40959f0
+size 2637
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.bias/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..47c90b67c744f1d85cfa884b428177a599e76192
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:552c4eea30799cc5ab8c4c63f395cafacb597a2adba948bc7f8ec29c99d73cad
+size 2950364
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..e5a50c6b1d6dd0c817cfa39e6929f8371c84030a
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17e57cc01d6e55ee580bd6b2d480bdefcb98086d2079b69cfc838a6ce615f486
+size 2950379
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..10611bbe105d12497c550df93287846047e4512e
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4acf6331626e6e9c7213ebe41800bdc728c5a3ea19fb37cab8a80c88c336671c
+size 2950285
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..2a11851ee184795e54a5360ec7c34e7a6ac9b7bf
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d65a53fb1b148a2fae7c4ecedcb3423f3ca9a730c7997d96694067dcb6f0618f
+size 1180
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..acf7dbd8feee73f35d551da3e1a38a215b0655ee
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fab266868cf02e1f9243e8c3641be0dff121a0b6e24548cd805d3713fb76f7b2
+size 1195
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/fp32.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a078e35b799185db154bf28c58ee131e12e671d9
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49b211e30f850ec3b29521b4665656fcc077814f5a003936a8fd041462440ec9
+size 1165
diff --git a/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/step.pt b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.layers.9.self_attn.v_proj_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.norm.weight/exp_avg.pt b/global_step262772_universal/zero/model.norm.weight/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..5a2376eb06a0ae0268397fe1a3a9c47c9a0f05ae
--- /dev/null
+++ b/global_step262772_universal/zero/model.norm.weight/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8090ab6523ed79eb162ffd787912ab6ad6c0ce015506dce3bb1fb5d84df352be
+size 8860
diff --git a/global_step262772_universal/zero/model.norm.weight/exp_avg_sq.pt b/global_step262772_universal/zero/model.norm.weight/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..6e24f238574c4a98a7eed93a387022450ba8a452
--- /dev/null
+++ b/global_step262772_universal/zero/model.norm.weight/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a945c5b9e744d0ba906402a9608f80669936bb4f831d2facc4f7826b8a852e0
+size 8875
diff --git a/global_step262772_universal/zero/model.norm.weight/fp32.pt b/global_step262772_universal/zero/model.norm.weight/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..a237cbe654b4d3e21a9a314b860b8f92ed3f4375
--- /dev/null
+++ b/global_step262772_universal/zero/model.norm.weight/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:536b1eb5812125dd6da86be1200ff1352f089c7a4867067eb133dce1432ce3c2
+size 8781
diff --git a/global_step262772_universal/zero/model.norm.weight/step.pt b/global_step262772_universal/zero/model.norm.weight/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.norm.weight/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/model.norm_alpha/exp_avg.pt b/global_step262772_universal/zero/model.norm_alpha/exp_avg.pt
new file mode 100644
index 0000000000000000000000000000000000000000..38fc650d69045782a0b3a5ba2852e7b1586fd499
--- /dev/null
+++ b/global_step262772_universal/zero/model.norm_alpha/exp_avg.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1034a13fe0624d5aacfce2c198ae8c4718d3a69bbbb972cf4f8e2dd438c9e0cb
+size 1180
diff --git a/global_step262772_universal/zero/model.norm_alpha/exp_avg_sq.pt b/global_step262772_universal/zero/model.norm_alpha/exp_avg_sq.pt
new file mode 100644
index 0000000000000000000000000000000000000000..b4648191fa8c5e291f143f5e6da9be54b97abffe
--- /dev/null
+++ b/global_step262772_universal/zero/model.norm_alpha/exp_avg_sq.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e04f8c1e8a5a835d469c0074c4fc5be2501acaecc9d2458a42030f9e03da40f
+size 1195
diff --git a/global_step262772_universal/zero/model.norm_alpha/fp32.pt b/global_step262772_universal/zero/model.norm_alpha/fp32.pt
new file mode 100644
index 0000000000000000000000000000000000000000..85e3f2be7153ed0dceb2979f3528b98894cc1261
--- /dev/null
+++ b/global_step262772_universal/zero/model.norm_alpha/fp32.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05afb2df525ba7023e756a09b949152ba9f460a318f236564784d92f5572e203
+size 1165
diff --git a/global_step262772_universal/zero/model.norm_alpha/step.pt b/global_step262772_universal/zero/model.norm_alpha/step.pt
new file mode 100644
index 0000000000000000000000000000000000000000..4e7472f664740164a9380436b9b58b32ddad2d6b
--- /dev/null
+++ b/global_step262772_universal/zero/model.norm_alpha/step.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
+size 852
diff --git a/global_step262772_universal/zero/optimizer_state.pt b/global_step262772_universal/zero/optimizer_state.pt
new file mode 100644
index 0000000000000000000000000000000000000000..3ce72c43bca2039b83b2a6be367483908733c93d
--- /dev/null
+++ b/global_step262772_universal/zero/optimizer_state.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5713674b05380123c9cd12704e1e442d4450372232da80e0c750dc1292a718b
+size 1664
diff --git a/latest_universal b/latest_universal
new file mode 100644
index 0000000000000000000000000000000000000000..5ba2c5c954286abdd3a767f33ca1823a1f0788bb
--- /dev/null
+++ b/latest_universal
@@ -0,0 +1 @@
+global_step262772_universal
\ No newline at end of file
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4f48b06e315d1fbf7e15ea6b70bb73346b6c8f00
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ec3e8a11737b11e832b6a2e2a35bed1ebb73d4f6de5dec60f4127ee27b771d0
+size 4848661852
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..a782b2f1cdab4d0bacb2dc0f85d02c4b1e31f0bd
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..efc4b7ce3fdffa249828bf0f3fe2256cae304a3f
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,226306 @@
+{
+ "version": "1.0",
+ "truncation": null,
+ "padding": null,
+ "added_tokens": [
+ {
+ "id": 0,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 1,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 2,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 102,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 103,
+ "content": "",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 104,
+ "content": "<|start_header_id|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 105,
+ "content": "<|end_header_id|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ },
+ {
+ "id": 106,
+ "content": "<|eot_id|>",
+ "single_word": false,
+ "lstrip": false,
+ "rstrip": false,
+ "normalized": false,
+ "special": true
+ }
+ ],
+ "normalizer": {
+ "type": "Sequence",
+ "normalizers": [
+ {
+ "type": "Prepend",
+ "prepend": "▁"
+ },
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ },
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": " "
+ },
+ "content": "▁"
+ }
+ ]
+ },
+ "pre_tokenizer": null,
+ "post_processor": {
+ "type": "TemplateProcessing",
+ "single": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ }
+ ],
+ "pair": [
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 0
+ }
+ },
+ {
+ "Sequence": {
+ "id": "A",
+ "type_id": 0
+ }
+ },
+ {
+ "SpecialToken": {
+ "id": "",
+ "type_id": 1
+ }
+ },
+ {
+ "Sequence": {
+ "id": "B",
+ "type_id": 1
+ }
+ }
+ ],
+ "special_tokens": {
+ "": {
+ "id": "",
+ "ids": [
+ 1
+ ],
+ "tokens": [
+ ""
+ ]
+ }
+ }
+ },
+ "decoder": {
+ "type": "Sequence",
+ "decoders": [
+ {
+ "type": "Replace",
+ "pattern": {
+ "String": "▁"
+ },
+ "content": " "
+ },
+ {
+ "type": "ByteFallback"
+ },
+ {
+ "type": "Fuse"
+ },
+ {
+ "type": "Strip",
+ "content": " ",
+ "start": 1,
+ "stop": 0
+ }
+ ]
+ },
+ "model": {
+ "type": "BPE",
+ "dropout": null,
+ "unk_token": "",
+ "continuing_subword_prefix": null,
+ "end_of_word_suffix": null,
+ "fuse_unk": true,
+ "byte_fallback": true,
+ "ignore_merges": false,
+ "vocab": {
+ "": 0,
+ "": 1,
+ "": 2,
+ "": 3,
+ "": 4,
+ "\n": 5,
+ "\t": 6,
+ "": 7,
+ "": 8,
+ "": 9,
+ "": 10,
+ "": 11,
+ "
": 12,
+ "": 13,
+ " | | ": 14,
+ "": 15,
+ "": 16,
+ "": 17,
+ "": 18,
+ "": 21,
+ "": 22,
+ "
": 23,
+ "": 24,
+ "": 25,
+ "": 26,
+ "": 27,
+ "": 28,
+ "": 29,
+ "": 30,
+ "": 31,
+ "": 32,
+ "
": 33,
+ "
": 34,
+ "
": 35,
+ "": 36,
+ "": 37,
+ "": 38,
+ "
": 39,
+ "": 40,
+ "": 41,
+ "
": 42,
+ "": 43,
+ "
": 44,
+ "
": 45,
+ "": 46,
+ "": 47,
+ "
": 48,
+ "": 49,
+ "": 50,
+ "": 51,
+ "0": 52,
+ "1": 53,
+ "2": 54,
+ "3": 55,
+ "4": 56,
+ "5": 57,
+ "6": 58,
+ "7": 59,
+ "8": 60,
+ "9": 61,
+ "+": 62,
+ "-": 63,
+ "=": 64,
+ ",": 65,
+ "。": 66,
+ "!": 67,
+ "?": 68,
+ "、": 69,
+ ":": 70,
+ "¥": 71,
+ ".": 72,
+ "!": 73,
+ "?": 74,
+ "...": 75,
+ "。。。": 76,
+ "。。。。。。": 77,
+ "《": 78,
+ "》": 79,
+ "【": 80,
+ "】": 81,
+ "『": 82,
+ "』": 83,
+ "```": 84,
+ "": 86,
+ "---": 87,
+ "": 88,
+ ";": 89,
+ ".": 90,
+ "=": 91,
+ "<": 92,
+ ">": 93,
+ "-": 94,
+ "+": 95,
+ "%": 96,
+ "‼": 97,
+ "㊣": 98,
+ "/": 99,
+ "|": 100,
+ "": 101,
+ "": 102,
+ "": 103,
+ "<|start_header_id|>": 104,
+ "<|end_header_id|>": 105,
+ "<|eot_id|>": 106,
+ "": 107,
+ "": 108,
+ "": 109,
+ "": 110,
+ "": 111,
+ "": 112,
+ "": 113,
+ "": 114,
+ "": 115,
+ "": 116,
+ "": 117,
+ "": 118,
+ "": 119,
+ "": 120,
+ "": 121,
+ "": 122,
+ "": 123,
+ "": 124,
+ "": 125,
+ "": 126,
+ "": 127,
+ "": 128,
+ "": 129,
+ "": 130,
+ "": 131,
+ "": 132,
+ "": 133,
+ "": 134,
+ "": 135,
+ "": 136,
+ "": 137,
+ "": 138,
+ "": 139,
+ "": 140,
+ "": 141,
+ "": 142,
+ "": 143,
+ "": 144,
+ "": 145,
+ "": 146,
+ "": 147,
+ "": 148,
+ "": 149,
+ "": 150,
+ "": 151,
+ "": 152,
+ "": 153,
+ "": 154,
+ "": 155,
+ "": 156,
+ "": 157,
+ "": 158,
+ "": 159,
+ "": 160,
+ "": 161,
+ "": 162,
+ "": 163,
+ "": 164,
+ "": 165,
+ "": 166,
+ "": 167,
+ "": 168,
+ "": 169,
+ "": 170,
+ "": 171,
+ "": 172,
+ "": 173,
+ "": 174,
+ "": 175,
+ "": 176,
+ "": 177,
+ "": 178,
+ "": 179,
+ "": 180,
+ "": 181,
+ "": 182,
+ "": 183,
+ "": 184,
+ "": 185,
+ "": 186,
+ "": 187,
+ "": 188,
+ "": 189,
+ "": 190,
+ "": 191,
+ "": 192,
+ "": 193,
+ "": 194,
+ "": 195,
+ "": 196,
+ "": 197,
+ "": 198,
+ "": 199,
+ "": 200,
+ "": 201,
+ "": 202,
+ "": 203,
+ "": 204,
+ "": 205,
+ "": 206,
+ "": 207,
+ "": 208,
+ "": 209,
+ "": 210,
+ "": 211,
+ "": 212,
+ "": 213,
+ "": 214,
+ "": 215,
+ "": 216,
+ "": 217,
+ "": 218,
+ "": 219,
+ "": 220,
+ "": 221,
+ "": 222,
+ "": 223,
+ "": 224,
+ "": 225,
+ "": 226,
+ "": 227,
+ "": 228,
+ "": 229,
+ "": 230,
+ "": 231,
+ "": 232,
+ "": 233,
+ "": 234,
+ "": 235,
+ "": 236,
+ "": 237,
+ "": 238,
+ "": 239,
+ "": 240,
+ "": 241,
+ "": 242,
+ "": 243,
+ "": 244,
+ "": 245,
+ "": 246,
+ "": 247,
+ "": 248,
+ "": 249,
+ "": 250,
+ "": 251,
+ "": 252,
+ "": 253,
+ "": 254,
+ "": 255,
+ "": 256,
+ "": 257,
+ "": 258,
+ "": 259,
+ "": 260,
+ "": 261,
+ "": 262,
+ "": 263,
+ "": 264,
+ "": 265,
+ "": 266,
+ "": 267,
+ "": 268,
+ "": 269,
+ "": 270,
+ "": 271,
+ "": 272,
+ "": 273,
+ "": 274,
+ "": 275,
+ "": 276,
+ "": 277,
+ "": 278,
+ "": 279,
+ "": 280,
+ "": 281,
+ "": 282,
+ "": 283,
+ "": 284,
+ "": 285,
+ "": 286,
+ "": 287,
+ "": 288,
+ "": 289,
+ "": 290,
+ "": 291,
+ "": 292,
+ "": 293,
+ "": 294,
+ "": 295,
+ "": 296,
+ "": 297,
+ "": 298,
+ "": 299,
+ "": 300,
+ "": 301,
+ "": 302,
+ "": 303,
+ "": 304,
+ "": 305,
+ "": 306,
+ "": 307,
+ "": 308,
+ "": 309,
+ "": 310,
+ "": 311,
+ "": 312,
+ "": 313,
+ "": 314,
+ "": 315,
+ "": 316,
+ "": 317,
+ "": 318,
+ "": 319,
+ "": 320,
+ "": 321,
+ "": 322,
+ "": 323,
+ "": 324,
+ "": 325,
+ "": 326,
+ "": 327,
+ "": 328,
+ "": 329,
+ "": 330,
+ "": 331,
+ "": 332,
+ "": 333,
+ "": 334,
+ "": 335,
+ "": 336,
+ "": 337,
+ "": 338,
+ "": 339,
+ "": 340,
+ "": 341,
+ "": 342,
+ "": 343,
+ "": 344,
+ "": 345,
+ "": 346,
+ "": 347,
+ "": 348,
+ "": 349,
+ "": 350,
+ "": 351,
+ "": 352,
+ "": 353,
+ "": 354,
+ "": 355,
+ "": 356,
+ "