diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/latest b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/latest new file mode 100644 index 0000000000000000000000000000000000000000..7b2c8602be034ae63f23b293f8d037fa7afa0c54 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/latest @@ -0,0 +1 @@ +pytorch_model \ No newline at end of file diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d023ce6f1a9554d87e519cbfd297df6cad19534e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98254f848a924e279982c14d5a1241a3798fe9c12c0cf0f74487053a36598200 +size 106611632 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f9e7cbced71c8344f306b0b38a264df96bdbb06 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_100_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6d8ef72ede544384b0c1bcbcf972ae8a6b4a1210fd90a0ce19852df75ba915 +size 106611968 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a758d0b30dacf27e5cac67181d4e89a312845450 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_101_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:185570b578bc4113e5e1cffb0c12f9b55bfa28b01c651c13c9ee48ee2fb51fa1 +size 106611584 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7579e435028b94f340297596750a2654ca84d197 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_102_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c1542e870eb8e722463eb9a8de930f9b55630fec9e591cc187119dcf75d1bd +size 106611456 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..219d75ec5aac327c7dfde18c23d5456c9ff1c841 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_103_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87552a885a7acedaab8373fee97d46ac8cdc0975cbc6e9e31b8abef299a55f1f +size 106612224 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..37d1ccb4eea2704e5d80c10ac67bb20f490566c6 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_104_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b4ebf9d3193bed0617b9fd525855ed16ba90f782c1061df04e62bd85888d47 +size 106611776 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..96d31342b0723dbc817a0db972b78fc407e45474 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_105_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:524daa245a4e9abeaf428ecb6c872de2931249a7405689bf5c7625c15911446f +size 106611584 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..82fc0c2168e1cda107e0c6c7f582c236b4afa466 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_106_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44fd88fc063856072b042554730a90c438046dd24c7913a107e070fa0c2cb03e +size 106612224 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa9d3a107b420f2110843153de413e907f000dc9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_107_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9d6e4af4ec68844e58a94d3c2a1f512832069e32f0fef5c70fafc9701b74c6 +size 106611776 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..417b3a1649791d84b1976fe69479220a93a542e0 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_108_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5cb6e1b4e9dc19d70a7676caac83630806ac2dad2360b1eafb51be824b4916 +size 106611456 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ff0bee245984ebe6719fb49d87fc4fd40fb5172 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_109_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:337360033e803e272157e513144defe1c35dd82f7602140f0d703833e2c0ea72 +size 106611456 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..653d1537c86307f32dd100756b1a6fdfad28261e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28851f1c00f5c987ccd8cc3ac5da31135fe273163102761d036aa72edfba1477 +size 106611960 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..922b9586b67cf4c768fc8217bb443c2f91d0cb37 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_110_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a6f35b617d5a5b6e048805dbba372007c37b3d85fc13ae50349e391a0e14f09 +size 106612224 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..79bd75d8b9dd59a33591fb11e23ea96f3a57e279 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_111_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803429b1f079b8424678bc152187126d7af24b75b58036e5db25ccfea4ec20b2 +size 106611776 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e46fb8785bc8e536a8fbb9f6cb3c227429023971 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_112_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3353fa83aae8ae829f69f54aa16e029451208d8b02dd451522e59c16089600f0 +size 106611584 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bd0702e74cbb8f51d50008c57657f785db1408d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_113_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02e058f80e2928ad2cdbfa4cf7cbdaae96f033f5dc7765b425160785788cd004 +size 106612224 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..550e088ead168caf97546904b3ff46fc60fe7c1d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_114_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1808b9a8e0b12ee19bd3694a82c1b9694b4ec9c861dd70634d66ba72e063d0e +size 106611776 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c16e0338bc03fa64d7b8e781bd31e7b4448c98d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_115_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8121fcd2af01ba1c89b300b7c57d897e0d52567ab361f50a1756c79eba624812 +size 106611456 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d86d4d6e32eac26798e094e7345c82bfe5e94b2b --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_116_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6e224e9b51246c95819f20f60b91458a191e550a23be841135f33b97d087db +size 106611712 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b27d7096e1d60c13a8dae1f2ce439748d13f496 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_117_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4553f8f173efee0e45138861209a75b2b181e735cd64ea8ec711bc7e7d573b +size 106612288 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..affef15f8cf807b937027c1c37896493e9202ec7 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_118_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83da3f2fe1aee28ccc9a68f6038844a70d9eebb41664eaea3645df0079796084 +size 106611584 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..106f0740409afdaf8e65e264b6aa6e798ef366c5 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_119_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:042857d7951dbbc2941fb9f183fb8e936586cceece72fc25b50a4d511382984b +size 106611456 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc17a100a047ff553aa19bf36cd2b17cf3777eac --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36267623c1420b8f15ac8ca09dd4966fc34454c1ff1f5dcad7c06b3c95a1adb2 +size 106611960 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b607b1f92783425df89a220d71a2a88fc41ebc86 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_120_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30e9deb920ae9e9cbca4551f1eaf7669bafea2ce98882cb4d16a3c42971dcb80 +size 106612224 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9c130c26a67d98022ef56014380f7c85a59b20d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_121_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f5d4368febb63f41127e700f277cd556d1d417057acbdf7327083cf9aa5af90 +size 106611776 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a8fce90c033f1e036e9f4604707adcfbe67dc2d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_122_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:217ce02818bc3bbb64553c9ad469047676f2f03ac699fb7347c2a75bd16fd8e4 +size 106611456 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3159548b118c4bbc1b2dd6879ac0370a489a9d5d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_123_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27b32b7488b7a68c5d6a36c9b415639ce17936a68315ef2e50c704981cf3c453 +size 106612032 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b5da437c0a6f31a7bdbc1220fdee1e0ad1892ed --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_124_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb2df97b80a7215a92f72dc7e2ad02af4f59184b3641e8d14fd094974a9db7f8 +size 106611968 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..55540b27a07720e37ef89a70f152b1ce98581cfe --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_125_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:496e06b51c8c905fbbec8ea695620ab7ec36a98fa8e5f2011f6bdebc9eecb70e +size 106611584 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6871b3b1fe7796e8dc56c2424249c5dd46b7c72a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_126_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f525aa25b166144efd0f01ceea32c946c5ae476bdffe88e67e3430cce9e5958 +size 106611456 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d57be2afe73ca08c0af9cc1ac7cc50f07bd8488 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_127_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f00c94acd3a080d3d8be34f5f55474f8d18fe8c41a222bd4bc8e8227f448d7f +size 106612288 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ba5c45d2134fdb93838b321808c4c0830e7cb44 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f32f335014640bf86bb3cdec07d7bedb808647e7b262127298d002e04b00213 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b381e60d1e0396de6b0098aac1b0df4055132815 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e93c0821d3586c2e936294c91feb74badfe8f08acbab71e2b960765747a5f03 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6aa4a07d7568a8c9569527de0c83badb3c877d0 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826c765acc2325509d95474e41d34c5c99e5f2caf2a10e2d92acb568f62c3e78 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..81c5c8b75e3cbce441bd1d003263f0a8d1f9d8cd --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b76371bd90a8d37fd6bfc2d63a2be5baaabfcdb01779c9f6e608baf78dbb3c1 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..347ad3360ded07fabd0f4646ba8f5452fd7da231 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_16_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9456f4504c9c98f52b06ddcb76d36edcb93236c6cf2cc0c27195d304bec71362 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..55e0b5615900c479a177b51aacb308f31ec3ef86 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_17_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c80841424cf28a932dc32786149257877f86b4c2e4d363e3d7a06543bd9e3c83 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb3263ad951975e141543730296f0d298ca7b439 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_18_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8da422be93993f27588bbcdb4d7122213eae16af37a8fcda9ffdf1c7a0a87b56 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3296fc39541e8bc80c05d8c3c0ea262483ddd79 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_19_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1ac7c2f1a0c36437a4783b7747e9ccdfd17103255e6a0eca0c097da378a21f +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e68d897f996efaa6c28abe4df1e80f0fe2ba546 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e102b70cb117a42fe044c9bf92b83222be9ee70ccdd5e22418f4f320a7988d5e +size 106611440 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1f80ad8988397fc55464d39d252897c259ba847 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_20_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0081898f42f5da24484ab18b2c7285d639c147b13f1e274b89e14cdb417142ac +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a3f0403e1cc4c20000d9993c43ec0c36ac81c6a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_21_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42da05ed9fab32d27dd78120fac3e31a3ffdabea86a8fbf3608de962f5d02169 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..67c2b5df2a172e5dfca6ebef2d745858041fbe2d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_22_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b54426359fe8b772eedd8569e20acd8281966c278b0ca7aa25ac1d8951dbde28 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..24259a4779be2a32a1398d7d8facbf0008bba43c --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_23_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb51127b0f1851e61644cd105dc724c68f5c367393065ee1b7545f91452b1670 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..593936985de224075848966b737d1e86e57f5ae9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_24_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a14405d113ea3a34097f53ff3204583e1971caa774d06e5c00754faca5524cbe +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c2405cb00cd54cb1e07025e59afe458f3862527 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_25_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da4b6a06f27fa9bc3a135d85069971c8311dafbd758913b13aab8d7cb0e6ca70 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..300622ed80eee1cf14f7b670951167e8d4c94e16 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_26_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08314edb4b9238d1d4974623a1ba06cc5fb77a1486ef2b0e296f7eb28aa1dc38 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b07b698c0c13614a607017932262a2a7f660f188 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_27_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0c352e0116b00e9e64c1d3a20b997ba08c9df4d21dfb5660b9864371e03b6e +size 106611704 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b61062436db7514dcd12851445243eefb6b4fb2 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_28_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2059f857c2bc7f180a35c95339bcb5513433a20dd6fa175395da95cbea1fdeb3 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..99040262acb4d18a6c10cd5a9b6287f9ab8acb8a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_29_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e5ef736f982562d3b566955842c19235e3fb3492dd759d20744769292a0b1df +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6bf8d6bcabe7d3471837a22fa5441a692385b08 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8c3b8341cebc12fa5a0b27a2da3265076b2656c218e9701aadbaeb607d3ea24 +size 106611376 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b42e0ffe8812df07a9fbd08fc45591d4c78a5c2f --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_30_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f844912fae7770a1f71b720749b2f1ae9c03c1f4c650f0dd103bc7b24af3a48b +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdd923e3f7901e274433b14550509b1a13e1c532 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_31_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45be4f3a0a910d425578cd3fd04ebdfb03d536d2da4f40e9a6355f677c80a908 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..59c50611fdd06585afbc10a01cbd184ef16bd693 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_32_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5f99329d8ad05dc23a4c863053bd4ecec02e3ec6ecf4a40dc43544dab762a7e +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4a2fdb1b9c5b26a98fd83e82656a4ec6dfe8e66 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_33_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802c0ec33ed948086e72c56a9496bdb1a1bfb32397254708e54733007d0f5e6d +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..56403077b1715f8aaf1c6939bae3fedc61929ddc --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_34_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f53887be049f11b1993ba2164f0629600815342b36b49a1a091ee37a265abb +size 106611960 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6024561178a0be31113201c4191e594dd055a5ee --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_35_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcdb9a56f8239bb728976b1d23acda4e4bac81801deaaa63846376b2d4cc8827 +size 106611960 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b212bcb19f306e8f29c4605755b72752f4684a04 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_36_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:525b553abd90cd74e68f55f3c56433b1dd114e45e15c237e8e84110b3e591f83 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad05d52c3c3648d1adf877c5855a1dc3bd04cfcb --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_37_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be84655bb2131ce3d22e4f5bcc9fe77dc166be1b6a76fbc19db6662400e61ab8 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..220bc8dfba12f6fb2514f81126f61ffa88c42f30 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_38_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edaa10cd5fcfc8ca98b1311afe7e0da0b0b754f0dfbfea7b1eb42d50e94dfbb +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8316092373b25d7b174c274b270b6f52cbc40c68 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_39_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:300df58c20d73d76cec563b323c67b67e788cdc89bb0a296e25d0b0fa3f5d84b +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..688f44bc5195e750f15d255bbd907875d286c82a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07aad23792b8ac0a2cd068d638c8ddd288958d90d50f67934c99add594702d5e +size 106611376 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a1bb19731ef5fa0df9c7f612edd54fd540a5707 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_40_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:621a7ad6132667513d185e4c9398e88f27cc57d4807d6c5b205845b859056571 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f06817d44188ce1f8ad277c586a2123ae6958665 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_41_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63a5ddb4d29042149a3203a5a2f1d918d6ffb006be3b3bbcd5980dfe551f361d +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8f2558102f9e0e73dc66290fe8e6e838bb14413 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_42_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e65804dd5f59bea3c2e5248aed80f2e71d538f91f42beebbe44c5740a83544 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..44609efd563a379e203acfc08610157e41060bc9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_43_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc73b3dfd782993b35fd4992115ff7fe1750d8f24a1fd5d408c89e28585657f +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..efa5870428da64b51794a794ceeeeec79b7510d2 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_44_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a062802b80c28f6b1ce3da7cfe14ffc47f9d2a8e3afe69de3f6403c9bacbc12 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5af5e0a6ef3c672bd11c903686ce06e8ea51c89 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_45_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b92c50c1585ee42f904015aeaa2ab61ab0c8b28f3760b80263826bd8eef2e6a +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..115ad06b2e33d0029275d25c002a00ab75261129 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_46_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e900f7bf404b52773ad5bfcf51e01eb8d28f5a8d06464b88678470e868a756 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad6477a6f2dd29923b93162942e6327e913db3d2 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_47_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6a367c9754639ab520025ccc2f35ad0a725c49a963563fc2f61f5e4c88a31c3 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..be943da376a2ba795b7b41fb0fcdf491f6e6baf6 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_48_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff3cc3fec6d20f0acabdb6f569206cd97bbed035bedcd6a6d374c915fd812ae +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dc15e6c5357d11954fe8bc26d2e1f1be6a06136 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_49_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ba3d49cfcddfd792eb7d30d61e79d52c82ca34bd8cd02cd44298e5cc65ff891 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e9cff1e530b7af6eef2fd64fdf848aff6179072 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61198540c82c11dbfb75028203963f784cd433c31a1e9c8f999c2139c0c11137 +size 106611696 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e04159799d4cda62c8d0d96f7d7526eadaa377e4 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_50_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbe8f58ca4d749bf1a18920cea5aa73462ce7a0878e0192087aa246c4d9c3798 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1fd1232e85d2743f227547156740274abbe15c2 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_51_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21b3d5e44d3c4a94685c9d2641b57a0053ed460cb576f5b103806178001b5d9 +size 106611704 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec0c5cb4e1077a48a5ac2f4d12ae2e909d5a6fb3 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_52_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb609864f92f12fccfccfa9e8a96c6ea5f936d1ee75b0fcb1928d7b5202f8824 +size 106612280 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d783d1d4c17cb8abc82f48f5e0aa1127f4e7ce7 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_53_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e23dffdf01917a8c87adb11b0a1bee160781d1c9e7e3aff701d4af9486c817 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf993c86f7aa9ad1ef09a289cfab3cbe2f8bbb7a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_54_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5999412be00028199147c6ebd23d7a94a03a3070853dad1d491ff9e56aecec10 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8d3ce9700895bfe97ef0fed6eca32a7d6f757c3 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_55_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba658932185657d613dbed20648bfdd5a5820fb45b3ef8c4f91ab3055cf6bfc +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4daa7924963e4952ca9bf6f851195855741da71 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_56_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1ed68ef24c211111cdd6a679c0e5f14a44ead0d04a888b4e63a00305b423013 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93094f6b535436af8adc53878e72952814cdb0ce --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_57_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28c2f334bffe6d00bd39849cc6041193c8427b67852a6c3253ea0e0ad671c963 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..58ae2743895bd41874c2522660c5c438d47d4d06 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_58_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d94dbaceef509df3ed2f10560d972aab7cf2cf42f3c125e8b42e20e008972b4f +size 106612344 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..13af65c0112cdefd1ce77b9e82001b01bfc99eeb --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_59_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9257292ece2cc86c8ee44c8dd2e679e40e45e7e20b3df7db6d99ce2424ec489 +size 106611640 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7db67b26004076992df7f06cf72ffdc1f28b2c98 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5920d092a045f60b085ddd1cba8ab74612505938b0ed2071b52c19ba25b51930 +size 106611824 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b5e0e39ba42ca8183be858ae5b6de0ae3876992 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_60_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edd465ed3b4169325ec424cea255a1c524119995900f84ccae1d5d9c7ffdfb5d +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f9dd5df8f4ce3fd3a4f17e0a9fd00e2646e7dae --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_61_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da7a5217135b38b69693eb293faa51cd89587558467f44c879150ce90a3dd00 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bea0dab6150cae119689cd78bf0c8f3f5c940f07 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_62_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a8c04fa9beaeaf446913c94be62ecb35ef490925606286db79b3a3c1c242502 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb5dbc4d69157e4c02a80415a84e565a958e30f0 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_63_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9925fc1f29f985cce5ad11d83b6e91d2596a9d450c7687529dc164f6fed9d661 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_64_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_64_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1ea0b65ab211e1c8843455aac37ee781d224564 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_64_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae184eef457b9d8957fe02343315c6833afbac3ccdfcd2d4f4c3a910b711c785 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_65_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_65_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..582ae1c3a8917d95fcd13e26c8f2df0ca6049397 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_65_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce7c3f7645eb7aab55f8ddb637449ca8432b806aee0112f7a19de3b4e19e060 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_66_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_66_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..07272658a113d2baf678cd884db7ac404b484f33 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_66_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be41920f364c30feb5cee3feac90e8bf540e350c75105548ad96b12f13f0aba4 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_67_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_67_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..76bd5a6980ce39b110bde84026d13f8cba5b6e59 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_67_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26ffcdafe446e8490bec7eafbe3d9fcbf7457852f085011799a20a8917f83b8 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_68_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_68_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9702da623c59221f6a828bb45f365a7cbb48de65 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_68_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbe9f16a3a2e2425b29fe4e417500c272250116557345898e5b2b26965cc052 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_69_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_69_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..5df8de5e4b503100e75e7ee6c52aa313946baf05 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_69_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf90284913e2547dc5a01e7ac9e5ad1148eaa75d87246465a2376ef9641b31b +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cabbae974f0ae3f02d388872397688ec16a41b0 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64014919fdf1364827ae6cb071237382a15542bc4694314c773dd8a586fa9e81 +size 106611440 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_70_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_70_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ef13fc5e4127419f0ed35c1ae317b57290965ce --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_70_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e8b3f91e1482b0865d6c309bd2c076b01e587958659c1ba52f91dfeb794049a +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_71_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_71_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf0e3a668435b92b16d27d75074a2ebe9576766d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_71_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c2fb03f0057c023bc40d92452d1cdcec66053302d96aa13272da282f919872 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_72_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_72_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a2cf25c8405dbbdcc2c41152fa2e563dba8e86f --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_72_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf4a210b63df5a74ebfd9781066220b9cb67b8b80f603b1b691882b62e3c054 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_73_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_73_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..177edb9c719c32fbfb5ac497901469df9aa528c7 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_73_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a10ddf46b5f9d661003719e3167b5417d2578243540f35b87ed6c1a9d2e1e8c +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_74_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_74_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..9381a7b59d6bacca6d4a4af5f1d3c49b36ffe681 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_74_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929107328869b6ac3ba1d995c3e93c3636ccfcb81ea71572871bf70dda1659ce +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_75_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_75_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..7da1fe19c1183d53494a4d3a20767700edc75b4b --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_75_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f6eb70e2ad438e0303f7f2308e69141d7141ebd79bf0e89cd4e712f48e31be9 +size 106611832 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_76_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_76_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9d41e5847a820abc4d4e3001e7ef3a4c16ef4fe --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_76_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8cbb5f526b296c2a70533f15b551dd0eca6d73f743b6f9da1cad2c30e627cc4 +size 106612088 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_77_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_77_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a9b4501c118fa5b2798a20140a6c146aa83ad75 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_77_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de5a22c38eb893124329eec04a224811ea44f6daa4f2df9f40554e870d09adc4 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_78_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_78_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b9f22694f00be3521275a040e448446dd8cd12d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_78_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac0e12e0ec0bdcb37c89a0d858f84e1236b9ecda7947497fabf4ca70df78eb8 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_79_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_79_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6c3d86766c03f60ddfc660f13157262c21a9ebf --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_79_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac43b6c80d86ef260555e9129e35afb86e79a6f63b37677d510a0604931d723 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..a99f0b862165e7751caf86e2c48cd1ff9b82fb45 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d50c1af87bd6371e646b32825d066b70abab5c8d3d46dc8fd6b57189c7fa13d +size 106612208 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_80_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_80_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..76b8ae609d1db6f5f35f373c62f5f5b26f34108c --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_80_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:610f4e2f89c93873f96882e07bc51822204a47b616362f557cbcc3a2282e05ba +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_81_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_81_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a5841163ef581dcd4a2d0f570664b4ae7070baf --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_81_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa3c5bcffabff02e6011abd75ca1b71d74ba336b26d519d75348379ac5e40179 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_82_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_82_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d080628dd543a04157ac8613a393582ddfa5cd76 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_82_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f67c935f5536be5696824e5e203fbef15cf7ad937fa889a8b37d15878116aae +size 106612344 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_83_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_83_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fbc89d7dd518e5a178b8178a260c0ec23ca608a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_83_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35e0ad4d884b4bb6a3137cebdf373630ccadef8703a693fc33c9777cf694f390 +size 106611640 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_84_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_84_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4c250b5008201ee7755923f1359c7c733290142 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_84_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2207e0da3b5dfb3b83b1cb36eeebaa635321746b19da5a5db8b4f7e7d6701d +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_85_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_85_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..94c0953701f8d3d40c4c36a85616e890ea2481d3 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_85_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e463c1c2aabf4ddd1895ba20b1fd459ec2e5b866224c82c971d88c810868f6aa +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_86_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_86_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..43bf87970681482196be5d2d7b0d927ab6daca05 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_86_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afcca8688435d5e22e775e35c4557a36b30368c0fd9e2789dfdf7ce94d77f1ee +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_87_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_87_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..72d9924f2b2b31f9e3b8041bfbd08ee07b18b2af --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_87_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9d408877ddcd0dcfebb898ed3fde59b31b7119e3d14c6d15cb8f7ccec7d5f07 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_88_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_88_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f9a0064dcb5022b1f0158db9a9a2368679b42c8 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_88_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081ca10167249b284f00e368ff88b48b94006934c7aaf63d5bcdcf001a3a350c +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_89_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_89_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0311b10da58589f1c59874336f8824e7cf44ad1f --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_89_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb188e8f3534b743734fa64e6c5e5a1f528a6255234c13fa1bf0c39351abb73 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f033e3f5926d0418d464feb62dc5f5d39975d361 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc7ff4233400aaa8a724f8d437e0992f14c37c7c8a90b05aa9e3b9dab3917de +size 106611760 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_90_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_90_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a483901fa8a0791d064602c350279a412d0c9eb --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_90_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76bd4459404ed572333f7f48592685429d9f540eaaa3bf421f34a6004819da0e +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_91_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_91_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cd5f025d2f41058ac8691f878c352c7f1c561f1 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_91_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0221da66f3dce5a7392d86f46406c23f13e22e0b88e6fd3a151355d9aa979733 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_92_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_92_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb03b7b3e7f4c34f6f6b98ec251d6cdfbd363747 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_92_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce6853db19c4f2af1990976052de982d177195f8314f8919458c115ea3f5276 +size 106611576 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_93_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_93_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..64c1ebd385fce9fe16da21c5dc4d721a56ed91a3 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_93_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7297bd1a8099485619ac17c381cbd2a94ced28b3fecf5cad4b03459efa02f3fd +size 106612088 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_94_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_94_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f10f3711bc5fa24fb21a18382f1ad94f13ec6528 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_94_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb3cbbccaed42d00007c0a70ef578584e2285d8757b8d5a4d7e1c6ffefce7265 +size 106611896 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_95_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_95_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..78c8d0672b6b044a38c82127de0899a5ffc98297 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_95_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15989eef06a2c7cc08459164d0ba6687e19431f65360bba51df33eb97b3b60ef +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_96_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_96_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..93d8df5c7240a9e5b5fc310292e06d04b880fa2f --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_96_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9785432b438d260b5c16e99b693f94eda3e26085a0931c21b3b2c82ac58abc2 +size 106612216 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_97_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_97_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..be77b8aa60ea77cf18ba7f2cd4237b4b6fdf8ed5 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_97_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bfa3cc414731c9595d02b26112956ed6375f0f0fa4c2cf02bc7f059f79c84f2 +size 106611768 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_98_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_98_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..f731aef89842cffcadff53afae3105b7846233ec --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_98_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6ab508ae9a88cde3fbab4f835cb1e1ee137e35ce93da18f1a715987762e2a4 +size 106611448 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_99_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_99_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..6799a40124807120f206084e0809a53f7353ee61 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_99_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c8431d5333fe6d5cd846f8a265e82e90fffdc1f94fd1595598ad0a2bb9f642 +size 106612024 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..d033c475c9607419f0a8eb3652d58a3bd1429218 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c8a68433f5f8b833fffaf2a00b18c62ef440562f9ba17b3d312dc0012c7a5b0 +size 106611440 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/mp_rank_00_model_states.pt b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e7fb018fd994b4166a438afb48f01ddd03603fd --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/pytorch_model/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8244da89b94b56950aa868a76ca1b87743acf8cbb72a1d96774cfd01a8d8da1 +size 2274504556 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_0.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f46b718a8bf1d0123b4a51fa3d90bc954d2def0d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_0.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1759c698cb1739ea9bcb991375e1e9ed4b0de2bc43a7b65148a1b5b4e5f59a80 +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_1.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_1.pkl new file mode 100644 index 0000000000000000000000000000000000000000..786b9a82842838ebfca767d75dda8d6489d8c1ce --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_1.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35fe65f033d65ff4e3086d3500a05ddd29742a62a55eba9a26f57ec25d0603f1 +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_10.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_10.pkl new file mode 100644 index 0000000000000000000000000000000000000000..88b867db0c5d9dbf1d1a73b3ac00412540de8e45 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_10.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93fb48466829600a74bd22ba4ad1da48bdbffc0622a09caff8a0f434a3435ec7 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_100.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_100.pkl new file mode 100644 index 0000000000000000000000000000000000000000..61aaeae0a0ac555f60cd726f4088785ea0ff8b80 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_100.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82994ddeed0b4d97d2325e58590fd985f4fdadca1f55790a2631d304dee74803 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_101.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_101.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5f163f157377f015fd0b710c5acea1b7dbbfe625 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_101.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b94a9dfe7e3aaab356e5599e11adf313f85c1f8e74f7830c0598e932fe02e40 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_102.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_102.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e7d54c8513c83f8b03f76db4942d3d4cf2380b00 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_102.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897322e14edc5a69275b9b25ea797503987cc842ff4fb8fee226b2e128177722 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_103.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_103.pkl new file mode 100644 index 0000000000000000000000000000000000000000..123e09ad787bd9c9720a9dd5fede1b29fab2e1d1 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_103.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78aa6061202ca85ac98edcfbd11b1d401af0ca0c268662143ffd505e3a0816a +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_104.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_104.pkl new file mode 100644 index 0000000000000000000000000000000000000000..401e01412e77ebbab1d3afa609149b20ef8959ad --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_104.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c09b0b088c6fa4bcb69ede05fd67e40918707e84af03a288f6b4787b423897e9 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_105.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_105.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ddf5d09fb34eac0fe0cdf5e45250698f0310699e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_105.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2eee673807c45ac64c1eab7a32d4275e8d7fa442fbda5cd310051e3c519d8b5 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_106.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_106.pkl new file mode 100644 index 0000000000000000000000000000000000000000..599e6a0b38ec68d80ce9a7a78bd4d8e58a6e3e0f --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_106.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c43364818fe3da7df11883bf0b1847411450c8aa048a599c8c42b32912ac2161 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_107.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_107.pkl new file mode 100644 index 0000000000000000000000000000000000000000..7235497e4849d4e3569d5b208be6e6fb90fef566 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_107.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bb71d0d580791f574d2ecee07acb65578b725ad6e0b7ab864e7754f70409c2b +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_108.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_108.pkl new file mode 100644 index 0000000000000000000000000000000000000000..573f0d1fc6f249c76f0c4298e1ba066608219519 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_108.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3e02eaa1d0d754207795916af26c6ee1dfa9f3559a0af97f4f95e1774b6074 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_109.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_109.pkl new file mode 100644 index 0000000000000000000000000000000000000000..073f16f4773c425aa4309c172debd510221d1414 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_109.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c7fbc23ae0ee4efeff42061d55d3d212332c2f9ceb9f4442ab497988a842b0 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_11.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_11.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ad855d73d318615e67e15c2d02c983fe8bfc0a74 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_11.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429f79360987a995feee8eed1abe8b6387653f048fe58b9373ec6944142c9184 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_110.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_110.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ca22d2a496ea81f954cd9a599662bb7e1d1cb38b --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_110.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df188475f62007a21b7bd71ba005a79193473b196fd16dd2d4c456fccd1779c8 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_111.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_111.pkl new file mode 100644 index 0000000000000000000000000000000000000000..118e7de1292634b4629134f416549749eb8a8c8e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_111.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:849376ad4712bf234c74263cdbf6e373ff78037aad41042814f77d27fc495f56 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_112.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_112.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c6413b206274b00908e79c7a506e98c6dfcb4552 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_112.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96c6e2e0946664cc8c64bb853662faf7a5d9424d9ce7eba04ce32146a95355d +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_113.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_113.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5a8a587a0519dd5d97bf0e14bd31cbe6b72f25b9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_113.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c957b7b35d55bc670d1bf547d9bbc9d57365c302d11ea5d8c6ad04abac1ce101 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_114.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_114.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f7c864a72395f61128e68da3a0a5a166f11912f3 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_114.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764a83ffa2756aba08889b900ab3903903a1e34c4a1409bd9d9ce22f6cfcbc8c +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_115.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_115.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5346771925c084c0ef422f167728b86dee753f1c --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_115.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5234efeb7146d6250c7770f5725af57d6ceb07eed440779a8e5803a48c70ea82 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_116.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_116.pkl new file mode 100644 index 0000000000000000000000000000000000000000..17fb56c4229ca194565553294285be50218b975d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_116.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d331d90cc464cd8c7fb52f9f477acbc524288661d51dece1ea10117b0a64cc +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_117.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_117.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e585343f68309689d2b3b91659cba6f2a98b7b1d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_117.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e38e7d470526d8a57c4e3faccacc2ab768cccf0f75a60cfdd434214fb0d23e3 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_118.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_118.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2ebfbc1dd5e2c1cac6f240680e04328041275ca9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_118.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5de295246a39c68cd04d0ade84ea56da10b870cc2e5ec6258561cd7205174ae1 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_119.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_119.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b6bbe3d17ffb3436e5c878a8ac69759d8159b545 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_119.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33d42c6fab0cd8b2bf242b75220284290ef0e20427c21076d748564f949ef6a3 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_12.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_12.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0925d0ec4aa83550991cb7f74450a1c8cfc48560 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_12.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481daf5aee5aa379287171107a24511cc0572aa239019cf8bc7b301c14bf1344 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_120.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_120.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a46ba8a67fcb2e0d46b542c55e5608b335fa0a79 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_120.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4dbf7fdb7929c95e123d3f9f2b8ba38d846b187a2635ca1aff1d40e30f6d96 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_121.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_121.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a4528926e754f45d42ee80007343ff5c35c547fd --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_121.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5038f03f1843e36dbec46c565d910893a3a130c2ffd317745c72c9664a523481 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_122.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_122.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3717e9b8a1dfe7eb74852123732daab5510b772e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_122.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd49145039aee8ca27bd7eda595d69d08fa5117e5a9c32bf7c77fd2187c188f0 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_123.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_123.pkl new file mode 100644 index 0000000000000000000000000000000000000000..52af7106c2085634399791fb614d45738f0e808e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_123.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28befb23b58eec950eecec2736d29d81211f9b7de908b166d107ce7f348e99d5 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_124.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_124.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e13beb8dbe2936673c967bf0d691bb2df0a5be86 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_124.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a28ae52bddfb0e1cdf2106abc491213f295550663e9fbd9e4d75c1bcfc290db +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_125.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_125.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8174374ec3c69bdbad345460665281c30f6a4bdf --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_125.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cefce5c593afd7080af9b7daeab68d500356875410f76ee8f8bbaabdd44a9197 +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_126.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_126.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4c9683ed40c344bc08ddb066e202b72d9479ee20 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_126.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5402cdcd0b7274eb16aba9ec8a80584ba8fc2ed59ba472c08b0055049188bbe +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_127.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_127.pkl new file mode 100644 index 0000000000000000000000000000000000000000..77fc7c02fbac800673a5cfeffd7ed830857e79d6 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_127.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31877d180c7eee0d53452981c39046a9b3fbc8a6ec57edd0e07c3ad76a69626d +size 16126 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_13.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_13.pkl new file mode 100644 index 0000000000000000000000000000000000000000..dee35ed80dbfa5cd5e3f59413d2b7d4339b53102 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_13.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a03a0b265e9c4f974a87e0783fc7a39f23da52a462c8243d42de2a6157a8a07e +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_14.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_14.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8e71bd0e9009f1feb73c53dad919596cc70e66ca --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_14.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a36e634f13c14d90c59a14c5f30500472218dcf108b8fc1b571937a6fd3b47f +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_15.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_15.pkl new file mode 100644 index 0000000000000000000000000000000000000000..48ee5ff8dc4b8a3b43d1b1aa72857e31cae979be --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_15.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1725d03c356fd746e153ae44b45011d3b5350967f55527deea240340723481 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_16.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_16.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4334006b114acb4a10abaae1c992407262a52333 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_16.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30fff7f01ae1842c42a4bf90e4705686a112d4258885be90c78653489755e24 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_17.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_17.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2dbadc26a60c3cc2458ea12140502bf5e6791d10 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_17.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2619919b87d3e2348af7a90a4a5231d1a789d5341178d1e5c51949ca44ceb387 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_18.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_18.pkl new file mode 100644 index 0000000000000000000000000000000000000000..872823e83fead1deb8734de170ba6e884698b470 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_18.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132d52c686ebc6d03f500a7fd61e93e1290b8e0c35a47faea906ea0baf8b4975 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_19.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_19.pkl new file mode 100644 index 0000000000000000000000000000000000000000..faf9b245cca42f40456d69e57f71cfe32abc6dad --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_19.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a27ffc1015f851bb69841c654ff9749408c67b49d54742bb07b2df0e200755e +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_2.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_2.pkl new file mode 100644 index 0000000000000000000000000000000000000000..beb4678dedc5b7878adb5d148e3aba6964c74cd6 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_2.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60bc0b8ffc202a3c6eb27c28e10cd5cce958cac46d33c55082fce439a04d59b +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_20.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_20.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e08ecfb47d71e15bd85254ae45740a0dc5503305 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_20.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ab377af7e4dbbebaec99aff18fbcb6b3100e8a8880bea1b6331a9f9eb22dff +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_21.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_21.pkl new file mode 100644 index 0000000000000000000000000000000000000000..50a60920006d1b7d6eb88499484c476085b66ff9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_21.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab11250f341907f2dca1d00038a8142bcde95a3eedfe3c47a4fe6ebc9ab7933 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_22.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_22.pkl new file mode 100644 index 0000000000000000000000000000000000000000..edbcd904cd41a1b45f92a4e74769be08779dbfa6 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_22.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038d862ca963d5e2265d745a1173ba958a17aa4e4899ba179541a677de239f88 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_23.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_23.pkl new file mode 100644 index 0000000000000000000000000000000000000000..51e0703580a241cd11f08f9ac2bc215b21669963 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_23.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc9e6c74dc477cd73b6e9e4e072e32462f834497fa47845a5f9e4d7b2eca616 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_24.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_24.pkl new file mode 100644 index 0000000000000000000000000000000000000000..273c8e15445ca7e6f330ee0f2a8cfa1565689a90 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_24.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb10ad58ae8a9c5981528260bb4c950fff5de66f245bc4c71a56a67003d9891 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_25.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_25.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1beaf65b25f35a81e4e4effa3552fe6cdf181361 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_25.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c8b737e3144ce77923c11d365111c8428bc7ee770c03fd87d9ba7b7cff815e +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_26.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_26.pkl new file mode 100644 index 0000000000000000000000000000000000000000..957a427dbe30e5d4a20c68bdbb403d12c53a7cc0 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_26.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09180d81647c825136aeda105ccd35365e036a98f98b49f33359129957c73544 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_27.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_27.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b69c29bdb34579ea8031774d1d9ef7fadc871a4e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_27.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b992fb799bf230fcffad1fc7427d2d5a8f497235cd2aca21ed647279c58b4723 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_28.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_28.pkl new file mode 100644 index 0000000000000000000000000000000000000000..75f70588308d317fe48903e56bb8a6f81cc891d7 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_28.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ac1781b14edb9662270416e2f8d861367f088131f3f60200be7db7415f3d0a +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_29.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_29.pkl new file mode 100644 index 0000000000000000000000000000000000000000..97952114ce274dcce4fe6a1c5814b59fb0a4838a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_29.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3ed38e47b5f0c22de49979c2e4c5eb0842e3dd3a2e8ec04c8e8ed1138d58578 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_3.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_3.pkl new file mode 100644 index 0000000000000000000000000000000000000000..59f43b51944a49b8208c280a1dafcf044e9febbd --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_3.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6486ea4a1c0c84e213291280c574bc0e59707a13fea1e02d9f4e415fbfdc9016 +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_30.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_30.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9222ecb0f25697511f0cff54f2a78892a6521bd2 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_30.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a63e8d5b0eb227a074ab27f1e0249e0e67a2f6fda60449bfc6d5e964a0148d4 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_31.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_31.pkl new file mode 100644 index 0000000000000000000000000000000000000000..fcea274d0de0ca7a6906a40b38d4ff13c173361c --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_31.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23bf1698d25aa858897e559f5fad24cb8d79ee826d193a9f6035c0be67fbf056 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_32.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_32.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1b72ed92703a4c6e2be9cceb0bded2f9660530c4 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_32.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd39a7e8eda9cf83f86be7a3044f7ce476992c2ee6c7631f980f194f012dd0d +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_33.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_33.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3d7d50bcd2a0b469482753f0534bd02655b3ae9d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_33.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc4c368faf20b49e850fc6b317118eb6acfaaf876cdaa65211574cd073a4968a +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_34.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_34.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f6dbf2f2724af6d90cf2170d7182aa2e9f110cd5 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_34.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:866620eb8f64f77af01508749e58681ac66704642825aaf9317f6cc1e519d192 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_35.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_35.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5fe8bb868a007b85db05b1034478676d0ea9d505 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_35.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7edf84cabf9df305c0a13d8ad10139a0507673f2851b82e51c6c7cee2e8d9e81 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_36.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_36.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6785c1bbf791ecbaba30f716a94b12e63b022ef0 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_36.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9186746f75277b8f6cc786cb6dd5a53ec8e603995e847c8d0de7e627682cebf5 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_37.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_37.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f8a75dea5994631f460696b2258e711afcccba0a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_37.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bea9df3ee72c8603d5606f4a6feb9007e63832d9b05520a435fa854f4665b03 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_38.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_38.pkl new file mode 100644 index 0000000000000000000000000000000000000000..813eb3537257d863c77aab1a64e3c0a30386fe4c --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_38.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1f8e7489fb50d92aec8087b6a12b982db2e97eab3b264034dd5aa5322f92613 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_39.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_39.pkl new file mode 100644 index 0000000000000000000000000000000000000000..820c11ec4f1e2002ba4b4ec78636821f49e88468 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_39.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:163125e485e4b74b4da9248a02afd084f247a90ece6fd8574bdd9d490bf63be8 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_4.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_4.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6cd22f3a959aa99dc7ec015c568c7e0a5c3aff3f --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_4.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb87e015f148d3f7b5ef3c3d948d127d150aeafc7c10d3fe683ccd47930c8cb4 +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_40.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_40.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0ca822beefbb9f5f888fe94d163ec3bfba49dfe9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_40.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d68ecf246d98342b2c02ce788960177829a5280185c454e2680e23b7f78bedc +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_41.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_41.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4dbe2ced6d1fb2b9bc9f42533bdc242b18c6598b --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_41.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f39c7b9b61245b4f42bcb6597fd64847abb7f17d268d72918fe9bfa16ecc4fa +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_42.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_42.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b99b974066a8a6f73b22f3a2b6578772356c5bb3 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_42.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ea958f43e3825cd40c8e7480c4435209e06bb5ddfe016fa22cdc90513bb2f83 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_43.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_43.pkl new file mode 100644 index 0000000000000000000000000000000000000000..5d6918d51fa1d30343547a065d1365ff0df597c3 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_43.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70c35908053f58dadc449ebacd18c9c144a132f8895b0834d9ff8783421da55a +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_44.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_44.pkl new file mode 100644 index 0000000000000000000000000000000000000000..72112fb1b18751bfd90c40087802d8c7252585c4 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_44.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8c3ed2feea2831a93a649f2ca2352bbbc7905dd7e011462d640bf40b287532 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_45.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_45.pkl new file mode 100644 index 0000000000000000000000000000000000000000..02567d4a0aa38cdc6a8c3aaa06d7cf158b09c2ec --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_45.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf4fec50ae95ec9812b5111c17d26d99567ff25da9f3576735084d25b7a2cd9 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_46.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_46.pkl new file mode 100644 index 0000000000000000000000000000000000000000..9afb691ac3623f8dcd4dc235aeba3f080db4e692 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_46.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3914a225871a22358ac50c1b7a690d0bae9b0016db4c17b753fd7762883a1b41 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_47.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_47.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c72ca5de923721e43c600bf156a72ac9f07586fd --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_47.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67f3b534defe9c332a8c1639e4195ad61193ef8c12da558df9a9941da7cfe91c +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_48.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_48.pkl new file mode 100644 index 0000000000000000000000000000000000000000..972d6638143f1fcba42092d6c3731806ac358a07 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_48.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e61b191eb404a106f311f6c1e4ca42ebae00e14d9f116a60c757d9da72a0d54 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_49.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_49.pkl new file mode 100644 index 0000000000000000000000000000000000000000..059340c4117ccf57bf9312878e7b61ee3a6e6d97 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_49.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed395b2c70f87fca84803b3564f99f5d0efdc1a071de7d8d67de553eabf14c5a +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_5.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_5.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3d8f4c51e14b1032dad0359008aea162375fddcb --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_5.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46b4bf363307b3e0bbbd2ffb8521045dd6fe4b146fb1da1b4e6c19a78ad46980 +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_50.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_50.pkl new file mode 100644 index 0000000000000000000000000000000000000000..14472ce67e918b0728db97c50ad0b7a3e68c8e63 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_50.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c982d9b585780ba5b014e25230a3c43202a9fc5e84b2719621641432ef143a2 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_51.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_51.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0aca3ffaa093452d44f358a1360e4f0d549a64f9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_51.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b93005a46d7eb2c0f0c76a587ca98372e3d3da66dc0d957e4db5a2e5d8914c +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_52.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_52.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d9253087494f3ab4e8b48461051e75ca0980af10 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_52.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d26cab1254ee668f11251a5b88185047ffd8bc83e7b27268cd962592ec78a33d +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_53.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_53.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0a1675e3f364a9af66bc206286e9bcd04b0710c7 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_53.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca22c8949bc38a8aab4b7973317b876a5d2ebd87cf6cdf25b1cff2ccb444fbe1 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_54.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_54.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4b99b953d91068277577fd1b4221206c3254b7f3 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_54.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f1ad0b46d94ef201c095b154332b3e7993a5d0006e7724b745edc5e84b3d76 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_55.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_55.pkl new file mode 100644 index 0000000000000000000000000000000000000000..86b532df7a4a86b0d520efbab9668203edba802a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_55.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2bbacd9665d8397a6748d5e63643752484620a7785a1dcb35d95cec1d1e4199 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_56.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_56.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3637ac5bcd9bc478fd987977242c3ec138e1795a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_56.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c3e338a02c6d67640526c4a02893d2fdda6faf76c136ac24341ba9e04586976 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_57.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_57.pkl new file mode 100644 index 0000000000000000000000000000000000000000..476029ec27bd1537e925f7ff70da34562c452019 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_57.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18afc30d7f52ccaa5420c8173277ea792c60a3ba898198bb3e3a59c4f933fad +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_58.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_58.pkl new file mode 100644 index 0000000000000000000000000000000000000000..2b1bbd67a0fffa45abadf2c846b785110fbda452 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_58.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:837c6506b13b64d53c44993691e33a4bbd19db5b605e39e4edce701c5f3aac1f +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_59.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_59.pkl new file mode 100644 index 0000000000000000000000000000000000000000..282a29c84270c7997690b15d3e03fdc923ff3af7 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_59.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:895f66227d0e6021eacf0baa57e670650a8f6231cbc62b4494849e95375876d5 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_6.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_6.pkl new file mode 100644 index 0000000000000000000000000000000000000000..cd0b25a85014e90a79a229fea6d3cb1cdb925f47 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_6.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10497981f749e328d031beda4da6763b1b6ac9f2e74b6c9a7b4a6da7a33ee44b +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_60.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_60.pkl new file mode 100644 index 0000000000000000000000000000000000000000..486d2d4a0aa86b80e7353dd302dfa0346af4e019 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_60.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d65b48f31cb14887db50f028e965a6591e9d3628067532d7b53e92501a340ef2 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_61.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_61.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1416ae66161ece09ae8738ea7308944a641022ef --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_61.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f6ec96b6cb5d29a8a06a1f9fafca16494829f873c355addfcbaa557c2ca84b +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_62.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_62.pkl new file mode 100644 index 0000000000000000000000000000000000000000..14dc5bd7d42e5427037d8f60354658262dc91e51 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_62.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:528261dc53640d8a9024c41430e0b0759dfef5f652182906544e9103d5a41749 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_63.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_63.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3a1b77f97ca98a146119930fb1f487b390226527 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_63.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:759e3ddc20f7e0b40d2ac5ae52cf4c74a3fd1feb75607f40177ecc25602731f3 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_64.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_64.pkl new file mode 100644 index 0000000000000000000000000000000000000000..79f9ccaa7e84062c80ee375b10a094da2891f46a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_64.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01df35e5f34a65c3ecfe0de4237356746d8d8bd455476dbfe2496529932adf3f +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_65.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_65.pkl new file mode 100644 index 0000000000000000000000000000000000000000..79fa476ecb2e2dc46976af52d3cad2b30ebd373c --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_65.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22e9e9197cc5b87cabf637f9e563521216982846ffab9005e3bca829b22eaf2 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_66.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_66.pkl new file mode 100644 index 0000000000000000000000000000000000000000..384adfe90f5c53bbd5323dce093e2cf5fc7b9f69 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_66.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:990a13e9e8219488ba6713c7b1f844008607f52fa220625417d47eb5e279fa1e +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_67.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_67.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4c8ff2da6dcc26d71a4324d7c2e6820c60329dda --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_67.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef14f3205da5155c4d1546aee5f765adbab5366d76cb47fb6d0f7545e20e08f4 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_68.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_68.pkl new file mode 100644 index 0000000000000000000000000000000000000000..6b1f67efb400f9de84c337b3b405ca8faf4956a6 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_68.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e325120eb4647cc44e998bf84df18fdbeaaa929038985d09578821489e91d763 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_69.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_69.pkl new file mode 100644 index 0000000000000000000000000000000000000000..845f5a89e4f80da6417a02e594951feefd1b7f4d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_69.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a11128e37a338f1eb9b44debb605347681917bc2c5cba035a60fc488907cecb4 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_7.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_7.pkl new file mode 100644 index 0000000000000000000000000000000000000000..af21fbf561ddd7cb6837a9b0a675bdba6632f69f --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_7.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4675db9755500f90f2fbaa18927e91ea8fc8a423227c884ea9cdfb6a000ef03e +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_70.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_70.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f2f95a21ce7eb0d0ca27616feebb61951d2e7aaf --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_70.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76924c9a51ec44779047a3dc55733a85a9f7ad9d0208db3423a761437cf4c88a +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_71.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_71.pkl new file mode 100644 index 0000000000000000000000000000000000000000..83832c31541402844d0adf251eee5608896f7ba9 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_71.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3aafe1c0076b0bb19b417f5c726a8dba2c88e260f5b0f58237f085a9cd79efe +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_72.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_72.pkl new file mode 100644 index 0000000000000000000000000000000000000000..949a4b1dc478166ea0bcec07dedde761c1e1cb07 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_72.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae5b20c9bb537c7a28ccaf1238498feea340e03b898f818fa270c24ec677d9c +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_73.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_73.pkl new file mode 100644 index 0000000000000000000000000000000000000000..f148726c338d75377644bd0be7a90b9ab7e74cfb --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_73.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0566820ad1ac101b75c29941ad1480e6974cd70132bfce544fe81adfa877e50 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_74.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_74.pkl new file mode 100644 index 0000000000000000000000000000000000000000..436cf2f56bb8220fa537f9fd377b86f257fe7a99 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_74.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6fe8b0e02592def879fc9e8bdda840126ccfefa3ecda5d777bc3026a6e68d2c +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_75.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_75.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8f3bc7707566ab01aa52b0b3900f96f15c5a8f5e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_75.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4903e70d230b889c884ce29a8720aecd70a17068a04b2cd417e4c6c4780ce04f +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_76.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_76.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0f2d1eccef5cfab4c82dbbe3e3fa150a968c1ab7 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_76.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c3153a9857a5dc4f6d0e8923948d08e2c3d86f7f86a85ededeecdf779778ce7 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_77.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_77.pkl new file mode 100644 index 0000000000000000000000000000000000000000..68ca071ae898826fe9e4d494d81c8d3e68281154 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_77.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b86b90b29e80dbfae706aaecb0ebdf047e606dc46848860904bfe727997515f +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_78.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_78.pkl new file mode 100644 index 0000000000000000000000000000000000000000..050f50cd91f1551dcf2fc471f2cbc9c0a27b789c --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_78.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:891a859eba7f3042b2fce58bae0574fc8f43c9f1c26a3fcf72ae366a31804e22 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_79.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_79.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e6fcc0f22b5104e18db76ef57c105668de8a588b --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_79.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d903c6d5afefe5a23a7dbf174358d0e5bffcd0a3d57b91677bf15151955dba61 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_8.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_8.pkl new file mode 100644 index 0000000000000000000000000000000000000000..66e5d50b6e7420da0015db6a1096d7c6ce4aaeeb --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_8.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a3a5b666067f05704b5c1a2418f511d236d0e5a6fc9582331d7dd197fdea763 +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_80.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_80.pkl new file mode 100644 index 0000000000000000000000000000000000000000..39c225fe0fed453eefdcccbb311412f5a1ff37c4 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_80.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b304c3c13e373c6fcfb77bc85ba831a9fee7917df7ff0842f2e4ecf146802726 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_81.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_81.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e32e87aa1e082f2a639d703ca9a015f7e168aa41 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_81.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0507f7ed9ba285b98639ce5b892f5a42930219a0ad4ef87eba8bb85d6281fd68 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_82.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_82.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e527364a0b4833cb0fea484ac1557dd8cbf42485 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_82.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28159087e5dd7a7a3ec9a7397fb689f0db808c178bc60e891fc2bd019766f237 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_83.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_83.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0af062fff284dbe285ce6580ea077b7532d38427 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_83.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976b53da6c7484071044db85a909198ecb18afebc35ffae5337cb49c2a10a859 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_84.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_84.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d46af99222d3b0b2d805a1d5cdab953ad967e78f --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_84.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7dff331e4c7dcbe9e9a79295c170cc92fa6c352fdd50a5e7194fb38c77c9b44 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_85.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_85.pkl new file mode 100644 index 0000000000000000000000000000000000000000..d52170861caa227625b0b9249a4e95ace3211316 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_85.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b74811de499dacd4ede98d10708579d9cba1ec69a00aaaac3b6f6f2d3755d307 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_86.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_86.pkl new file mode 100644 index 0000000000000000000000000000000000000000..beb39f34daf397ea83c419529ee56d75db0a558a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_86.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9184b330b42050e1d6738de5f2ddcdc8c14b579dae9b2156424785e9c2d39ac5 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_87.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_87.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0912bcc5c6ca56b4d54642e1ed690f5435932808 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_87.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a96340d4fa6fe35e7e08d8caca4df1ba9d5cba9c4a9bfa55f2a7c26fdec450 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_88.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_88.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4d1b3575d910b06b590cfea91fd1f9f68bf10fb6 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_88.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a8105d4780a60544574088291c870527535671dee0c79366c6da98dea653f3c +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_89.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_89.pkl new file mode 100644 index 0000000000000000000000000000000000000000..90af98347d50ccbb7bbc8dd359ee11d2389ee82e --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_89.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff664c4b1094edf91dacbf6bb391a84dd3feb0672cbcc1497945ece7539ba70d +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_9.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_9.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c8730f37b4dc0d4b20848c96e4af31f23530bc7c --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_9.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2e46326de7546caa4eec2756f126676d93fc0ec97a7698865409c1321800d9b +size 16100 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_90.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_90.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3181ea75dfe7c4c9744871b61ed0711a7bade850 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_90.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c7ced48df66ae4a40fe3ece5a63306e09d0c9fb0266a912e3d373717d58697 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_91.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_91.pkl new file mode 100644 index 0000000000000000000000000000000000000000..dbf24c46703ed50f292012b55b23a40c1a37924a --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_91.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06aa8e95a6a3aaa7f50b3e7163dc64314e7c4ee7f60eaa56bfe7486d1fbc18f2 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_92.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_92.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4d0967e0efdaa46aa15c210d5bc182210837f234 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_92.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:017801189fea0b11073b557753e7a85a406015ca8df77802cb3e463691702992 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_93.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_93.pkl new file mode 100644 index 0000000000000000000000000000000000000000..a239ee46d8ef78b9c082d00bf336a0d080dba058 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_93.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5e0fece481e843700d20aee01db52a97dce15cfb2898a5a771fa6f96436524 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_94.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_94.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8ec8fcd8708587d37a7ab87288d0713b823160cf --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_94.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670d0f17fa36317e0099863a0f3fd3aae30ba0c9af8808031ecad0ca154cfac8 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_95.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_95.pkl new file mode 100644 index 0000000000000000000000000000000000000000..3357c496ec65d0f22a484215344bfc93672da98b --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_95.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1126df54c0bec53398cd60a70e4b4dc2582255cf7b3422af48bed9dc2378432f +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_96.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_96.pkl new file mode 100644 index 0000000000000000000000000000000000000000..4098879ed59d9e9efeee814933c1a77942e87126 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_96.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71fab1f04bc3bc7ce246527d357e719c84c69f97435ef7cc52959c69370fb93 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_97.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_97.pkl new file mode 100644 index 0000000000000000000000000000000000000000..df78bc8d84103ccea2074450ac11e4aadc49e56d --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_97.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:553fedd99bf4ed7c7f74593c6f4f46704d0a74a3e79bf1a00882fd595e7ecdf6 +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_98.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_98.pkl new file mode 100644 index 0000000000000000000000000000000000000000..ba58c3d3441277aab90b146fda1cb9144c6b9dbb --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_98.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00185299bb1e015aae06f3b67dd2cef4cabbe839aee47a9cb8287acf6824b37e +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_99.pkl b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_99.pkl new file mode 100644 index 0000000000000000000000000000000000000000..930092b7ac8fd3da7f7a1269f125883716d98e47 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/random_states_99.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ab19faa5eac910b8056e326f3ecd4ab8b28208aba61bcea8262df61ce47cbe +size 16113 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/scheduler.bin b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/scheduler.bin new file mode 100644 index 0000000000000000000000000000000000000000..6535ba7bbdb44997c97fcf2eaa9c973636162ceb --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/scheduler.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d00893bbaf20d9c3d18bd13d0cd363f31cdfe9580c4cd2701c6cc052093ba4ef +size 1000 diff --git a/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/zero_to_fp32.py b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/zero_to_fp32.py new file mode 100644 index 0000000000000000000000000000000000000000..24cc342e78d1a006c782b3a4cd68d9ce786d8fd8 --- /dev/null +++ b/1B_10K_bs1024_ff15_ln_warmup_ds_v2/checkpoint-370000/zero_to_fp32.py @@ -0,0 +1,604 @@ +#!/usr/bin/env python + +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# DeepSpeed Team + +# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets +# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in +# the future. Once extracted, the weights don't require DeepSpeed and can be used in any +# application. +# +# example: python zero_to_fp32.py . pytorch_model.bin + +import argparse +import torch +import glob +import math +import os +import re +from collections import OrderedDict +from dataclasses import dataclass + +# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with +# DeepSpeed data structures it has to be available in the current python environment. +from deepspeed.utils import logger +from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS, + FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES, + FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS) + + +@dataclass +class zero_model_state: + buffers: dict() + param_shapes: dict() + shared_params: list + ds_version: int + frozen_param_shapes: dict() + frozen_param_fragments: dict() + + +debug = 0 + +# load to cpu +device = torch.device('cpu') + + +def atoi(text): + return int(text) if text.isdigit() else text + + +def natural_keys(text): + ''' + alist.sort(key=natural_keys) sorts in human order + http://nedbatchelder.com/blog/200712/human_sorting.html + (See Toothy's implementation in the comments) + ''' + return [atoi(c) for c in re.split(r'(\d+)', text)] + + +def get_model_state_file(checkpoint_dir, zero_stage): + if not os.path.isdir(checkpoint_dir): + raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist") + + # there should be only one file + if zero_stage <= 2: + file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt") + elif zero_stage == 3: + file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt") + + if not os.path.exists(file): + raise FileNotFoundError(f"can't find model states file at '{file}'") + + return file + + +def get_checkpoint_files(checkpoint_dir, glob_pattern): + # XXX: need to test that this simple glob rule works for multi-node setup too + ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys) + + if len(ckpt_files) == 0: + raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'") + + return ckpt_files + + +def get_optim_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt") + + +def get_model_state_files(checkpoint_dir): + return get_checkpoint_files(checkpoint_dir, "*_model_states.pt") + + +def parse_model_states(files): + zero_model_states = [] + for file in files: + state_dict = torch.load(file, map_location=device) + + if BUFFER_NAMES not in state_dict: + raise ValueError(f"{file} is not a model state checkpoint") + buffer_names = state_dict[BUFFER_NAMES] + if debug: + print("Found buffers:", buffer_names) + + # recover just the buffers while restoring them to fp32 if they were saved in fp16 + buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names} + param_shapes = state_dict[PARAM_SHAPES] + + # collect parameters that are included in param_shapes + param_names = [] + for s in param_shapes: + for name in s.keys(): + param_names.append(name) + + # update with frozen parameters + frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None) + if frozen_param_shapes is not None: + if debug: + print(f"Found frozen_param_shapes: {frozen_param_shapes}") + param_names += list(frozen_param_shapes.keys()) + + # handle shared params + shared_params = [[k, v] for k, v in state_dict["shared_params"].items()] + + ds_version = state_dict.get(DS_VERSION, None) + + frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None) + + z_model_state = zero_model_state(buffers=buffers, + param_shapes=param_shapes, + shared_params=shared_params, + ds_version=ds_version, + frozen_param_shapes=frozen_param_shapes, + frozen_param_fragments=frozen_param_fragments) + zero_model_states.append(z_model_state) + + return zero_model_states + + +def parse_optim_states(files, ds_checkpoint_dir): + + total_files = len(files) + state_dicts = [] + for f in files: + state_dict = torch.load(f, map_location=device) + # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights + # and also handle the case where it was already removed by another helper script + state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None) + state_dicts.append(state_dict) + + if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]: + raise ValueError(f"{files[0]} is not a zero checkpoint") + zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE] + world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT] + + # For ZeRO-2 each param group can have different partition_count as data parallelism for expert + # parameters can be different from data parallelism for non-expert parameters. So we can just + # use the max of the partition_count to get the dp world_size. + + if type(world_size) is list: + world_size = max(world_size) + + if world_size != total_files: + raise ValueError( + f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. " + "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes." + ) + + # the groups are named differently in each stage + if zero_stage <= 2: + fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS + elif zero_stage == 3: + fp32_groups_key = FP32_FLAT_GROUPS + else: + raise ValueError(f"unknown zero stage {zero_stage}") + + if zero_stage <= 2: + fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))] + elif zero_stage == 3: + # if there is more than one param group, there will be multiple flattened tensors - one + # flattened tensor per group - for simplicity merge them into a single tensor + # + # XXX: could make the script more memory efficient for when there are multiple groups - it + # will require matching the sub-lists of param_shapes for each param group flattened tensor + + fp32_flat_groups = [ + torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts)) + ] + + return zero_stage, world_size, fp32_flat_groups + + +def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters): + """ + Returns fp32 state_dict reconstructed from ds checkpoint + + Args: + - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are) + + """ + print(f"Processing zero checkpoint '{ds_checkpoint_dir}'") + + optim_files = get_optim_files(ds_checkpoint_dir) + zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir) + print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}") + + model_files = get_model_state_files(ds_checkpoint_dir) + + zero_model_states = parse_model_states(model_files) + print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}') + + if zero_stage <= 2: + return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + elif zero_stage == 3: + return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters) + + +def _zero2_merge_frozen_params(state_dict, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + frozen_param_fragments = zero_model_states[0].frozen_param_fragments + + if debug: + num_elem = sum(s.numel() for s in frozen_param_shapes.values()) + print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in frozen_param_fragments.values()]) + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + state_dict[name] = frozen_param_fragments[name] + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _has_callable(obj, fn): + attr = getattr(obj, fn, None) + return callable(attr) + + +def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + + # Reconstruction protocol: + # + # XXX: document this + + if debug: + for i in range(world_size): + for j in range(len(fp32_flat_groups[0])): + print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}") + + # XXX: memory usage doubles here (zero2) + num_param_groups = len(fp32_flat_groups[0]) + merged_single_partition_of_fp32_groups = [] + for i in range(num_param_groups): + merged_partitions = [sd[i] for sd in fp32_flat_groups] + full_single_fp32_vector = torch.cat(merged_partitions, 0) + merged_single_partition_of_fp32_groups.append(full_single_fp32_vector) + avail_numel = sum( + [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups]) + + if debug: + wanted_params = sum([len(shapes) for shapes in param_shapes]) + wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes]) + # not asserting if there is a mismatch due to possible padding + print(f"Have {avail_numel} numels to process.") + print(f"Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + total_numel = 0 + total_params = 0 + for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups): + offset = 0 + avail_numel = full_single_fp32_vector.numel() + for name, shape in shapes.items(): + + unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape) + total_numel += unpartitioned_numel + total_params += 1 + + if debug: + print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ") + state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape) + offset += unpartitioned_numel + + # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and + # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex + # paddings performed in the code it's almost impossible to predict the exact numbers w/o the + # live optimizer object, so we are checking that the numbers are within the right range + align_to = 2 * world_size + + def zero2_align(x): + return align_to * math.ceil(x / align_to) + + if debug: + print(f"original offset={offset}, avail_numel={avail_numel}") + + offset = zero2_align(offset) + avail_numel = zero2_align(avail_numel) + + if debug: + print(f"aligned offset={offset}, avail_numel={avail_numel}") + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero2_merge_frozen_params(state_dict, zero_model_states) + + _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def zero3_partitioned_param_info(unpartitioned_numel, world_size): + remainder = unpartitioned_numel % world_size + padding_numel = (world_size - remainder) if remainder else 0 + partitioned_numel = math.ceil(unpartitioned_numel / world_size) + return partitioned_numel, padding_numel + + +def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states): + if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0: + return + + if debug: + for i in range(world_size): + num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values()) + print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}') + + frozen_param_shapes = zero_model_states[0].frozen_param_shapes + wanted_params = len(frozen_param_shapes) + wanted_numel = sum(s.numel() for s in frozen_param_shapes.values()) + avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size + print(f'Frozen params: Have {avail_numel} numels to process.') + print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params') + + total_params = 0 + total_numel = 0 + for name, shape in zero_model_states[0].frozen_param_shapes.items(): + total_params += 1 + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + + param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states) + state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape) + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements") + + +def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states): + param_shapes = zero_model_states[0].param_shapes + avail_numel = fp32_flat_groups[0].numel() * world_size + # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each + # param, re-consolidating each param, while dealing with padding if any + + # merge list of dicts, preserving order + param_shapes = {k: v for d in param_shapes for k, v in d.items()} + + if debug: + for i in range(world_size): + print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}") + + wanted_params = len(param_shapes) + wanted_numel = sum(shape.numel() for shape in param_shapes.values()) + # not asserting if there is a mismatch due to possible padding + avail_numel = fp32_flat_groups[0].numel() * world_size + print(f"Trainable params: Have {avail_numel} numels to process.") + print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.") + + # params + # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support + # out-of-core computing solution + offset = 0 + total_numel = 0 + total_params = 0 + for name, shape in param_shapes.items(): + + unpartitioned_numel = shape.numel() + total_numel += unpartitioned_numel + total_params += 1 + + partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size) + + if debug: + print( + f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}" + ) + + # XXX: memory usage doubles here + state_dict[name] = torch.cat( + tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)), + 0).narrow(0, 0, unpartitioned_numel).view(shape) + offset += partitioned_numel + + offset *= world_size + + # Sanity check + if offset != avail_numel: + raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong") + + print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements") + + +def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states, + exclude_frozen_parameters): + state_dict = OrderedDict() + + # buffers + buffers = zero_model_states[0].buffers + state_dict.update(buffers) + if debug: + print(f"added {len(buffers)} buffers") + + if not exclude_frozen_parameters: + _zero3_merge_frozen_params(state_dict, world_size, zero_model_states) + + _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states) + + # recover shared parameters + for pair in zero_model_states[0].shared_params: + if pair[1] in state_dict: + state_dict[pair[0]] = state_dict[pair[1]] + + return state_dict + + +def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with + ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example + via a model hub. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + + Returns: + - pytorch ``state_dict`` + + Note: this approach may not work if your application doesn't have sufficient free CPU memory and + you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with + the checkpoint. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint + # do the training and checkpoint saving + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu + model = model.cpu() # move to cpu + model.load_state_dict(state_dict) + # submit to model hub or save the model to share with others + + In this example the ``model`` will no longer be usable in the deepspeed context of the same + application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead. + + """ + if tag is None: + latest_path = os.path.join(checkpoint_dir, 'latest') + if os.path.isfile(latest_path): + with open(latest_path, 'r') as fd: + tag = fd.read().strip() + else: + raise ValueError(f"Unable to find 'latest' file at {latest_path}") + + ds_checkpoint_dir = os.path.join(checkpoint_dir, tag) + + if not os.path.isdir(ds_checkpoint_dir): + raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist") + + return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters) + + +def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None, exclude_frozen_parameters=False): + """ + Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be + loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed. + + Args: + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + - ``exclude_frozen_parameters``: exclude frozen parameters + """ + + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters) + print(f"Saving fp32 state dict to {output_file}") + torch.save(state_dict, output_file) + + +def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None): + """ + 1. Put the provided model to cpu + 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` + 3. Load it into the provided model + + Args: + - ``model``: the model object to update + - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``) + - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14`` + + Returns: + - ``model`: modified model + + Make sure you have plenty of CPU memory available before you call this function. If you don't + have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it + conveniently placed for you in the checkpoint folder. + + A typical usage might be :: + + from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint + model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir) + # submit to model hub or save the model to share with others + + Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context + of the same application. i.e. you will need to re-initialize the deepspeed engine, since + ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it. + + """ + logger.info(f"Extracting fp32 weights") + state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag) + + logger.info(f"Overwriting model with fp32 weights") + model = model.cpu() + model.load_state_dict(state_dict, strict=False) + + return model + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser() + parser.add_argument("checkpoint_dir", + type=str, + help="path to the desired checkpoint folder, e.g., path/checkpoint-12") + parser.add_argument( + "output_file", + type=str, + help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)") + parser.add_argument("-t", + "--tag", + type=str, + default=None, + help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1") + parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters") + parser.add_argument("-d", "--debug", action='store_true', help="enable debug") + args = parser.parse_args() + + debug = args.debug + + convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir, + args.output_file, + tag=args.tag, + exclude_frozen_parameters=args.exclude_frozen_parameters)