IvanHU commited on
Commit
85d4dac
·
1 Parent(s): ad5373e

Upload deepspeed checkpoint

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. config.json +69 -0
  3. global_step262772_universal/mp_rank_00_model_states.pt +3 -0
  4. global_step262772_universal/zero/lm_head_alpha/exp_avg.pt +3 -0
  5. global_step262772_universal/zero/lm_head_alpha/exp_avg_sq.pt +3 -0
  6. global_step262772_universal/zero/lm_head_alpha/fp32.pt +3 -0
  7. global_step262772_universal/zero/lm_head_alpha/step.pt +3 -0
  8. global_step262772_universal/zero/model.embed_tokens.weight/exp_avg.pt +3 -0
  9. global_step262772_universal/zero/model.embed_tokens.weight/exp_avg_sq.pt +3 -0
  10. global_step262772_universal/zero/model.embed_tokens.weight/fp32.pt +3 -0
  11. global_step262772_universal/zero/model.embed_tokens.weight/step.pt +3 -0
  12. global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg.pt +3 -0
  13. global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg_sq.pt +3 -0
  14. global_step262772_universal/zero/model.layers.0.down_proj_alpha/fp32.pt +3 -0
  15. global_step262772_universal/zero/model.layers.0.down_proj_alpha/step.pt +3 -0
  16. global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg.pt +3 -0
  17. global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg_sq.pt +3 -0
  18. global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/fp32.pt +3 -0
  19. global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/step.pt +3 -0
  20. global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg.pt +3 -0
  21. global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg_sq.pt +3 -0
  22. global_step262772_universal/zero/model.layers.0.input_layernorm.weight/fp32.pt +3 -0
  23. global_step262772_universal/zero/model.layers.0.input_layernorm.weight/step.pt +3 -0
  24. global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg.pt +3 -0
  25. global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg_sq.pt +3 -0
  26. global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/fp32.pt +3 -0
  27. global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/step.pt +3 -0
  28. global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg.pt +3 -0
  29. global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg_sq.pt +3 -0
  30. global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/fp32.pt +3 -0
  31. global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/step.pt +3 -0
  32. global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg.pt +3 -0
  33. global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg_sq.pt +3 -0
  34. global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/fp32.pt +3 -0
  35. global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/step.pt +3 -0
  36. global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg.pt +3 -0
  37. global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg_sq.pt +3 -0
  38. global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/fp32.pt +3 -0
  39. global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/step.pt +3 -0
  40. global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg.pt +3 -0
  41. global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg_sq.pt +3 -0
  42. global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/fp32.pt +3 -0
  43. global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/step.pt +3 -0
  44. global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg.pt +3 -0
  45. global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg_sq.pt +3 -0
  46. global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/fp32.pt +3 -0
  47. global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/step.pt +3 -0
  48. global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg.pt +3 -0
  49. global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg_sq.pt +3 -0
  50. global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/fp32.pt +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ global_step262772_universal/ filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "YuLanMiniForCausalLM"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_yulanmini.YuLanMiniConfig",
7
+ "AutoModel": "modeling_yulanmini.YuLanMiniModel",
8
+ "AutoModelForCausalLM": "modeling_yulanmini.YuLanMiniForCausalLM",
9
+ },
10
+ "attention_bias": true,
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "dim_model_base": 1920,
14
+ "dim_model_base_attn": 64,
15
+ "dim_model_base_init": null,
16
+ "dim_model_base_lmh": 1,
17
+ "dim_model_base_logits": 1920.0,
18
+ "dim_model_base_lr": 256.0,
19
+ "down_proj_alpha": 0.03450327796711771,
20
+ "embed_tokens_alpha": 1,
21
+ "embedding_ln": false,
22
+ "embedding_rmsln": false,
23
+ "eos_token_id": 2,
24
+ "gate_up_proj_alpha": 0.3651483716701107,
25
+ "gradient_checkpointing_step": 56,
26
+ "hidden_act": "silu",
27
+ "hidden_size": 1920,
28
+ "hidden_states_shrink": 0.18708286933869706,
29
+ "init_scale_o": 1,
30
+ "initializer_range": 5e-05,
31
+ "input_layernorm_alpha": 1.0,
32
+ "intermediate_size": 4800,
33
+ "k_proj_alpha": 0.3651483716701107,
34
+ "layer_norm_eps": 1e-06,
35
+ "lm_head_alpha": 1.0,
36
+ "ln_scale": 1,
37
+ "max_position_embeddings": 28723,
38
+ "model_reproduce": "transformer",
39
+ "model_type": "yulanmini",
40
+ "norm_alpha": 1.0,
41
+ "num_attention_heads": 30,
42
+ "num_epochs_trained_before_this_epoch": 26,
43
+ "num_hidden_layers": 56,
44
+ "num_key_value_heads": 6,
45
+ "num_steps_trained_before_this_epoch": 253006,
46
+ "o_proj_alpha": 0.03450327796711771,
47
+ "post_attention_layernorm_alpha": 1.0,
48
+ "q_proj_alpha": 0.3651483716701107,
49
+ "qk_layernorm": false,
50
+ "rms_norm_eps": 1e-06,
51
+ "rms_type": "llama",
52
+ "rope_scaling": null,
53
+ "rope_theta": 490000.0,
54
+ "scale_emb": 10.0,
55
+ "shrink_alpha": 1,
56
+ "sliding_window": null,
57
+ "tie_word_embeddings": true,
58
+ "torch_dtype": "bfloat16",
59
+ "transformers_version": "4.44.0",
60
+ "use_cache": true,
61
+ "use_emb_alpha": true,
62
+ "use_liger": true,
63
+ "use_norm_alpha": true,
64
+ "use_sliding_window": false,
65
+ "v_proj_alpha": 0.3651483716701107,
66
+ "vocab_size": 99000,
67
+ "wesar_weights": true,
68
+ "z_loss": 0.0001
69
+ }
global_step262772_universal/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:068960e69bea17a9db7d28394f5d4188548e03b6123f2523d3306b4ea7453d3a
3
+ size 4468641200
global_step262772_universal/zero/lm_head_alpha/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24d1e97f32ee6a14d3b980485c33afd6eff0be75132a5e6c4420616ff70ba33a
3
+ size 1180
global_step262772_universal/zero/lm_head_alpha/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:488a84a8d79d80262280c382dc3084ae6a17c283073d19c6b8f3624a7e30504a
3
+ size 1195
global_step262772_universal/zero/lm_head_alpha/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920a12ef25b001861b5826242427f2a5a12cd1ab5c0994646ac3a00744359739
3
+ size 1165
global_step262772_universal/zero/lm_head_alpha/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.embed_tokens.weight/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:421f96fa124ae49c657d5be3c502391f9e3cb8147eed8c191851902a96d22b5e
3
+ size 760321244
global_step262772_universal/zero/model.embed_tokens.weight/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2bf52fee54eac12bad80a185c257169bdfc0d128b378c42d44f9161ea3ea7f8
3
+ size 760321259
global_step262772_universal/zero/model.embed_tokens.weight/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72496ea0b76d71296d456c776e28a2614bee98cb7e37c9016aa7be2c0518d440
3
+ size 760321165
global_step262772_universal/zero/model.embed_tokens.weight/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eec0ee5600fa9b05480124179b95824614d6dd26ad3fb6caadf510d2e6969608
3
+ size 1180
global_step262772_universal/zero/model.layers.0.down_proj_alpha/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dbb3f87cb27b5f6ff2ee19db88cb77df6116edefdb304413066049db1e2d1ce
3
+ size 1195
global_step262772_universal/zero/model.layers.0.down_proj_alpha/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9557f20dc5e6ff6c7b550489769bd9682302a2573064285af046614557a5c642
3
+ size 1165
global_step262772_universal/zero/model.layers.0.down_proj_alpha/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8de139b0406580e783c6c0734de73ae7a99f77e76981a4271107a26a9fb37e2
3
+ size 1180
global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a7b0967aff9b563dee22481fafdfa316d0b84101c6a07aad6c3542643f6df04
3
+ size 1195
global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6dcfec001a806aa802f80e13df9a12f8fb938d0d95356ff766172d117efd756
3
+ size 1165
global_step262772_universal/zero/model.layers.0.gate_up_proj_alpha/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b633dfc682f32f3af5721f1e3e0f74de925ed6bf41e108347972271ef13f2e9b
3
+ size 8860
global_step262772_universal/zero/model.layers.0.input_layernorm.weight/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7413a968724e9d33504e0c55dffaf905eea10453636f2653b0d14f7cb59c16f
3
+ size 8875
global_step262772_universal/zero/model.layers.0.input_layernorm.weight/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0edcc90e8d3e55908e1568e75c4a61a180dc276a963dbff07eae0e77f71f1a96
3
+ size 8781
global_step262772_universal/zero/model.layers.0.input_layernorm.weight/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a41d5db3d8cacbd3dbb42afedfcc7907df0a4c91ba4e95e833bdd6e961281406
3
+ size 1180
global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c67b9840fd22c47fc0db75fb4070cc7c0df66a45a6dc834942d25b202a6df1cb
3
+ size 1195
global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd4e0b75f2f2fdaaf690fb830f58bf40df16b01b8c8dbba760f95a74d3fb5b45
3
+ size 1165
global_step262772_universal/zero/model.layers.0.input_layernorm_alpha/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a4083737526eb251d4607ad0d638422fff8e0e1e7f86308d6c41784ad0d211c
3
+ size 36865244
global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ead5e3dc0d98cfe7817fed158c0ca54504c6cc1149677627b2c39831b673c7ee
3
+ size 36865259
global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f6aa9574d69070bb7ecb6ef7b0e8858090bc10c08aec506e4f3112e21144bc4
3
+ size 36865165
global_step262772_universal/zero/model.layers.0.mlp.down_proj.weight/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96f6eb1b2f763978c092f284f62204be74c428b1a6c18290ba899d2c0623df16
3
+ size 36865244
global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d484c7d9b0de884d18649bb0a1f335884b9242d4292d5ba430056f913414a4b1
3
+ size 36865259
global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4bb6e12bbc1413cda9cc0951e1f91dc995cc85593c668a6f03acaa9e58319d6
3
+ size 36865165
global_step262772_universal/zero/model.layers.0.mlp.gate_proj.weight/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3c6617a041f3748e8181289f24ceff5f2cc982d43fd975da2c8366b91bf3651
3
+ size 36865244
global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f297e1d3bbc8ea0c21fc1b11b90970681b704e3b662b99473cf859c1884133ef
3
+ size 36865259
global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e20916aea6240cd434aa26da48df675e4f4ba6b59d9a067e4a2d2a8fd56bdd48
3
+ size 36865165
global_step262772_universal/zero/model.layers.0.mlp.up_proj.weight/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c1be7eadf6d9dc033847ee913ade2676ff54832e06de6640078ca66c7450a6d
3
+ size 8860
global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8783d4267134189b045a6bf632c72918e7774fdde3dbe0a4af6b64f4966e28d6
3
+ size 8875
global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14dd67d7ce0a219a1514cbf1e711ce7454a70f808e9eeafb5b266e5c26a2eb0b
3
+ size 8781
global_step262772_universal/zero/model.layers.0.post_attention_layernorm.weight/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d428b0db470fb2efd3ce6dff50c56de02f6d82c00c930714f7bd4ac34e5c5df
3
+ size 1180
global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55c91a7433fefb18434ce776e6dddc567b4b258b026ed78286a6ad87f8a06080
3
+ size 1195
global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f815161135baf75aa6a0cac8111d03984316e61209efd4ec2100cdc6a8a66a8
3
+ size 1165
global_step262772_universal/zero/model.layers.0.post_attention_layernorm_alpha/step.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31c7b9f13cc6e441f30fe561a052ed7fe853d93f8c150906b9656a8040e2ae39
3
+ size 852
global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9c337d7fe1cfa0f72e2f62ef43cbd3c435a3c88c021608bb2d18d7e1aaacd57
3
+ size 2716
global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg_sq.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bdfe78e0197b464163d167b81c4d61ebe17878506184d7ef06442370fcbf185
3
+ size 2731
global_step262772_universal/zero/model.layers.0.self_attn.k_proj.bias/fp32.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c026e2229dedc3ec003318235c3e237568a65105983663ef2eda4cf610e37ee4
3
+ size 2637