diff --git a/README.md b/README.md index f413214e99361376802b9870c8f593b751636fd6..f8c898b6cbc242ac24a0189b8cb2b1e067f1786b 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ model-index: type: Humanoid-v4 metrics: - type: mean_reward - value: 645.91 +/- 94.67 + value: 685.21 +/- 164.35 name: mean_reward verified: false --- diff --git a/events.out.tfevents.1705702819.3090-172.2624734.0 b/events.out.tfevents.1705702819.3090-172.2624734.0 deleted file mode 100644 index 74cd9c6776b9c3e2939c2eea3a3748f2ecac2207..0000000000000000000000000000000000000000 --- a/events.out.tfevents.1705702819.3090-172.2624734.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5699cc286ae2c3921a70c48cf78128bcdc7625dfb2cc54eb1e00f0913cdb1260 -size 1610211 diff --git a/events.out.tfevents.1705733782.4090-171.2664005.0 b/events.out.tfevents.1705733782.4090-171.2664005.0 new file mode 100644 index 0000000000000000000000000000000000000000..1f85f0208d9a86a846680ded5cadd15b189f014c --- /dev/null +++ b/events.out.tfevents.1705733782.4090-171.2664005.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05dd45be84f4cbe9d207759d20c5518a07a60b44ed7fde367869956beeff1869 +size 1553803 diff --git a/ppo_fix_continuous_action-10000.cleanrl_model b/ppo_fix_continuous_action-10000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1cb48d6b97acaabd1190cdb093d653cedea85d7e Binary files /dev/null and b/ppo_fix_continuous_action-10000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-100000.cleanrl_model b/ppo_fix_continuous_action-100000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e0d3020cecdef8749d189e1d356ec295057b43b9 Binary files /dev/null and b/ppo_fix_continuous_action-100000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-105000.cleanrl_model b/ppo_fix_continuous_action-105000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..dfff3f1ea61995b84a794eb3ab8aff0bfb62223c Binary files /dev/null and b/ppo_fix_continuous_action-105000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-110000.cleanrl_model b/ppo_fix_continuous_action-110000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0c51ffa3e2362b7304a82f3c9438511f429789b0 Binary files /dev/null and b/ppo_fix_continuous_action-110000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-115000.cleanrl_model b/ppo_fix_continuous_action-115000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1617341b87c0388919921bfd6b2581c81b6963ba Binary files /dev/null and b/ppo_fix_continuous_action-115000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-120000.cleanrl_model b/ppo_fix_continuous_action-120000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cce310edb303f0d286a0d2568be60b0dce9b9bf5 Binary files /dev/null and b/ppo_fix_continuous_action-120000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-125000.cleanrl_model b/ppo_fix_continuous_action-125000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..88729c161311d53db03dde288572dcbf8b6781d1 Binary files /dev/null and b/ppo_fix_continuous_action-125000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-130000.cleanrl_model b/ppo_fix_continuous_action-130000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a02a6e76d24c98f3530768a7d2342074ee3b74fc Binary files /dev/null and b/ppo_fix_continuous_action-130000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-135000.cleanrl_model b/ppo_fix_continuous_action-135000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e6fff8acbdc0437f5f5bf1099e8a295a92b14c95 Binary files /dev/null and b/ppo_fix_continuous_action-135000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-140000.cleanrl_model b/ppo_fix_continuous_action-140000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..13dc68165666e98f140313e72c21ceab129169b8 Binary files /dev/null and b/ppo_fix_continuous_action-140000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-145000.cleanrl_model b/ppo_fix_continuous_action-145000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c8fc2b5c4b7c1f76e268b90ccd017e852ee2abb7 Binary files /dev/null and b/ppo_fix_continuous_action-145000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-15000.cleanrl_model b/ppo_fix_continuous_action-15000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d7f5297c48772a4df47f672e8a36d7bc9a39f53a Binary files /dev/null and b/ppo_fix_continuous_action-15000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-150000.cleanrl_model b/ppo_fix_continuous_action-150000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4e58e78deda924be40445817143ea4a9a3b66dda Binary files /dev/null and b/ppo_fix_continuous_action-150000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-155000.cleanrl_model b/ppo_fix_continuous_action-155000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2834b2a2222b45195b2e4da51452f5b6f3cb1c6d Binary files /dev/null and b/ppo_fix_continuous_action-155000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-160000.cleanrl_model b/ppo_fix_continuous_action-160000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..edd40dd512c3039765ec3671ee3c4b27d2771b89 Binary files /dev/null and b/ppo_fix_continuous_action-160000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-165000.cleanrl_model b/ppo_fix_continuous_action-165000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..804dad48fe555943f8a91b60df300ff89f4fe6cd Binary files /dev/null and b/ppo_fix_continuous_action-165000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-170000.cleanrl_model b/ppo_fix_continuous_action-170000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ecd241606a3e281ec2276d63e12f708c2bd59958 Binary files /dev/null and b/ppo_fix_continuous_action-170000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-175000.cleanrl_model b/ppo_fix_continuous_action-175000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d8baaedb4e71cc925e4e8ef371f125d2b9deb46e Binary files /dev/null and b/ppo_fix_continuous_action-175000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-180000.cleanrl_model b/ppo_fix_continuous_action-180000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b860f80304435b02d39c05c1cf83b8bf9b0ef774 Binary files /dev/null and b/ppo_fix_continuous_action-180000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-185000.cleanrl_model b/ppo_fix_continuous_action-185000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..aedd1449e4fde2fc5cda31770c5a819ecb4c4281 Binary files /dev/null and b/ppo_fix_continuous_action-185000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-190000.cleanrl_model b/ppo_fix_continuous_action-190000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4aaf1b66a1af9513cd9e9ea5442637319df19041 Binary files /dev/null and b/ppo_fix_continuous_action-190000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-195000.cleanrl_model b/ppo_fix_continuous_action-195000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..03e05f0a6e84bae6869444cd060a7b2fa727ba22 Binary files /dev/null and b/ppo_fix_continuous_action-195000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-20000.cleanrl_model b/ppo_fix_continuous_action-20000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..85a44820501f1ff4fd1ddca5d7b23411a5b12c0f Binary files /dev/null and b/ppo_fix_continuous_action-20000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-200000.cleanrl_model b/ppo_fix_continuous_action-200000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c84ae298a6936f8ed5695627b04bdf2f8c1b3566 Binary files /dev/null and b/ppo_fix_continuous_action-200000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-205000.cleanrl_model b/ppo_fix_continuous_action-205000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6e5c86c617f33dc453064884a0094c5c0b772bc7 Binary files /dev/null and b/ppo_fix_continuous_action-205000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-210000.cleanrl_model b/ppo_fix_continuous_action-210000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9c63d1d77d501d5b5d2b40aa3ee59b58052f1ea6 Binary files /dev/null and b/ppo_fix_continuous_action-210000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-215000.cleanrl_model b/ppo_fix_continuous_action-215000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..452643a502d344a42cc874f2b122ae37ba43bb44 Binary files /dev/null and b/ppo_fix_continuous_action-215000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-220000.cleanrl_model b/ppo_fix_continuous_action-220000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0bca96b6f40ebcb62b60d3471cefb8fe8760620a Binary files /dev/null and b/ppo_fix_continuous_action-220000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-225000.cleanrl_model b/ppo_fix_continuous_action-225000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cb58e3078941f12a2153b917d4aa530abb94ef7b Binary files /dev/null and b/ppo_fix_continuous_action-225000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-230000.cleanrl_model b/ppo_fix_continuous_action-230000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..81f2101bded58386522ad14e038aae35d11ecb3d Binary files /dev/null and b/ppo_fix_continuous_action-230000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-235000.cleanrl_model b/ppo_fix_continuous_action-235000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ad7a735658c3489054ba0af0386c030c93281d8b Binary files /dev/null and b/ppo_fix_continuous_action-235000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-240000.cleanrl_model b/ppo_fix_continuous_action-240000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..81eeaac58be88fdf227bc121b0a6de790366f075 Binary files /dev/null and b/ppo_fix_continuous_action-240000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-245000.cleanrl_model b/ppo_fix_continuous_action-245000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..70da2a6f9b8a430c8050b8ce79a2affd7cee33b5 Binary files /dev/null and b/ppo_fix_continuous_action-245000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-25000.cleanrl_model b/ppo_fix_continuous_action-25000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e6c983e63e9efa1405d14e9d8624938412fa278f Binary files /dev/null and b/ppo_fix_continuous_action-25000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-250000.cleanrl_model b/ppo_fix_continuous_action-250000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c06800b3d4fe810e0cac55d64d8f291eb9b9551a Binary files /dev/null and b/ppo_fix_continuous_action-250000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-255000.cleanrl_model b/ppo_fix_continuous_action-255000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..936fcd74212ae8115feb9e465824f4680d299575 Binary files /dev/null and b/ppo_fix_continuous_action-255000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-260000.cleanrl_model b/ppo_fix_continuous_action-260000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29fe5fe900e137d31b7cd541455fca7bd1867acb Binary files /dev/null and b/ppo_fix_continuous_action-260000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-265000.cleanrl_model b/ppo_fix_continuous_action-265000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..40c580c4e0105e8f5ace0494df64abd6c88130de Binary files /dev/null and b/ppo_fix_continuous_action-265000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-270000.cleanrl_model b/ppo_fix_continuous_action-270000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..12e87c1e6a2d6e18f8ca133c32197f14b050c368 Binary files /dev/null and b/ppo_fix_continuous_action-270000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-275000.cleanrl_model b/ppo_fix_continuous_action-275000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..15766e44f56fa2e54c5ea1d99e9dc7cc8d067723 Binary files /dev/null and b/ppo_fix_continuous_action-275000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-280000.cleanrl_model b/ppo_fix_continuous_action-280000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..be8fc82e7f9b93cc7737daaf3bf82139afd01eb6 Binary files /dev/null and b/ppo_fix_continuous_action-280000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-285000.cleanrl_model b/ppo_fix_continuous_action-285000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ecfe6a1086b3cca76c1b48b5c218f3bcadb1a76a Binary files /dev/null and b/ppo_fix_continuous_action-285000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-290000.cleanrl_model b/ppo_fix_continuous_action-290000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c4b9e902cba96f3be3af1fc94f0258ab6865acdf Binary files /dev/null and b/ppo_fix_continuous_action-290000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-295000.cleanrl_model b/ppo_fix_continuous_action-295000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4f5c539f90a61f63f845e66876a42a67e7e7db31 Binary files /dev/null and b/ppo_fix_continuous_action-295000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-30000.cleanrl_model b/ppo_fix_continuous_action-30000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..dbfcc9e53bb2a5f456e0d714d24b55154b9cc92c Binary files /dev/null and b/ppo_fix_continuous_action-30000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-300000.cleanrl_model b/ppo_fix_continuous_action-300000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cc65977b497b4a18e1ff2ecf5094d6caf9c90ee4 Binary files /dev/null and b/ppo_fix_continuous_action-300000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-305000.cleanrl_model b/ppo_fix_continuous_action-305000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d9e477d1bdeb46dae100ee03e981e6d82007e396 Binary files /dev/null and b/ppo_fix_continuous_action-305000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-310000.cleanrl_model b/ppo_fix_continuous_action-310000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..58ff1d7c3cb7732229e6f82a242701aba1d20ec6 Binary files /dev/null and b/ppo_fix_continuous_action-310000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-315000.cleanrl_model b/ppo_fix_continuous_action-315000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2ad8e253157d05c5de6e7a752a2763c0846d93b5 Binary files /dev/null and b/ppo_fix_continuous_action-315000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-320000.cleanrl_model b/ppo_fix_continuous_action-320000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f1c8bdc2df845de86a4e1723c34f138b07194562 Binary files /dev/null and b/ppo_fix_continuous_action-320000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-325000.cleanrl_model b/ppo_fix_continuous_action-325000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cfbee7af52631e16a894b77170824297f11be877 Binary files /dev/null and b/ppo_fix_continuous_action-325000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-330000.cleanrl_model b/ppo_fix_continuous_action-330000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d345558283c41029078e9fca50595e81a3c8d234 Binary files /dev/null and b/ppo_fix_continuous_action-330000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-335000.cleanrl_model b/ppo_fix_continuous_action-335000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ab33ef50b7e3f1d33180abb9f0501cb00570b4e8 Binary files /dev/null and b/ppo_fix_continuous_action-335000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-340000.cleanrl_model b/ppo_fix_continuous_action-340000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2f406a5e876448d978e2ac7ffabd94fef504340e Binary files /dev/null and b/ppo_fix_continuous_action-340000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-345000.cleanrl_model b/ppo_fix_continuous_action-345000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..00a0013acd974a305c0365144b582c8b9bda4f7f Binary files /dev/null and b/ppo_fix_continuous_action-345000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-35000.cleanrl_model b/ppo_fix_continuous_action-35000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..942569c91d17fb2af7e69838c6176ebd1825f0c0 Binary files /dev/null and b/ppo_fix_continuous_action-35000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-350000.cleanrl_model b/ppo_fix_continuous_action-350000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f68b531a14c7617c86766a62832cb2225977160a Binary files /dev/null and b/ppo_fix_continuous_action-350000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-355000.cleanrl_model b/ppo_fix_continuous_action-355000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0b7bba2d0cd2a8aa2e262cd2cf05c634281f3c59 Binary files /dev/null and b/ppo_fix_continuous_action-355000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-360000.cleanrl_model b/ppo_fix_continuous_action-360000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a11e61412f4206cb777759043586bc9c4840d056 Binary files /dev/null and b/ppo_fix_continuous_action-360000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-365000.cleanrl_model b/ppo_fix_continuous_action-365000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a558a9ee39c1afd4919c8ade07555d7d5ff69b66 Binary files /dev/null and b/ppo_fix_continuous_action-365000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-370000.cleanrl_model b/ppo_fix_continuous_action-370000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..64827d27547574a97a098867df3448a17ce8ceb2 Binary files /dev/null and b/ppo_fix_continuous_action-370000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-375000.cleanrl_model b/ppo_fix_continuous_action-375000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e9d971cfc1a0b3cd18172976521d301c20e95b40 Binary files /dev/null and b/ppo_fix_continuous_action-375000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-380000.cleanrl_model b/ppo_fix_continuous_action-380000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c091c78b87da2423253bb95ab979e51558dcf2a0 Binary files /dev/null and b/ppo_fix_continuous_action-380000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-385000.cleanrl_model b/ppo_fix_continuous_action-385000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0b3531ef1f64cd43960e777eb30661e7f01724b6 Binary files /dev/null and b/ppo_fix_continuous_action-385000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-390000.cleanrl_model b/ppo_fix_continuous_action-390000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..651d767da647665540fdb4c0618ee0feb7f0af20 Binary files /dev/null and b/ppo_fix_continuous_action-390000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-395000.cleanrl_model b/ppo_fix_continuous_action-395000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4f37b7574c56cd15847a154dbec528c410c63471 Binary files /dev/null and b/ppo_fix_continuous_action-395000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-40000.cleanrl_model b/ppo_fix_continuous_action-40000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..609453d07731a30dc62c8548fdf2e4e17cffd7ee Binary files /dev/null and b/ppo_fix_continuous_action-40000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-400000.cleanrl_model b/ppo_fix_continuous_action-400000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..aa682e74f1cc237abf7721a871ad748e9af54e22 Binary files /dev/null and b/ppo_fix_continuous_action-400000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-405000.cleanrl_model b/ppo_fix_continuous_action-405000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9a7051a53887f866a581249ee92ed5fa5dec0a22 Binary files /dev/null and b/ppo_fix_continuous_action-405000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-410000.cleanrl_model b/ppo_fix_continuous_action-410000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bb7293ecc8ec668c0066a860e92d167cc32d07e9 Binary files /dev/null and b/ppo_fix_continuous_action-410000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-415000.cleanrl_model b/ppo_fix_continuous_action-415000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6f63803c6951672798f092a825e5985062d43de8 Binary files /dev/null and b/ppo_fix_continuous_action-415000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-420000.cleanrl_model b/ppo_fix_continuous_action-420000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..10a2970bdb9379f6f34124dd26be0468d36d961a Binary files /dev/null and b/ppo_fix_continuous_action-420000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-425000.cleanrl_model b/ppo_fix_continuous_action-425000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..dda9bd2fc0a20b2cc2bf01b9334b68cad14c9b90 Binary files /dev/null and b/ppo_fix_continuous_action-425000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-430000.cleanrl_model b/ppo_fix_continuous_action-430000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f4587c0860d64c14087e461189baaf58727facbe Binary files /dev/null and b/ppo_fix_continuous_action-430000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-435000.cleanrl_model b/ppo_fix_continuous_action-435000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6aab7abc92bd27ac81a070a18c60e7206cf771d1 Binary files /dev/null and b/ppo_fix_continuous_action-435000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-440000.cleanrl_model b/ppo_fix_continuous_action-440000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cdcccf2531991aa760ece7b2ee09a06189df0f79 Binary files /dev/null and b/ppo_fix_continuous_action-440000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-445000.cleanrl_model b/ppo_fix_continuous_action-445000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c96eeedfcb61a67c910e2b5e69c08f30e35ed791 Binary files /dev/null and b/ppo_fix_continuous_action-445000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-45000.cleanrl_model b/ppo_fix_continuous_action-45000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..609a36e6fcc957e9e1bad1dfdcaf61eaf098d8c0 Binary files /dev/null and b/ppo_fix_continuous_action-45000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-450000.cleanrl_model b/ppo_fix_continuous_action-450000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..42c6ee849dbce52419c454a1ce36067ba5d2c245 Binary files /dev/null and b/ppo_fix_continuous_action-450000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-455000.cleanrl_model b/ppo_fix_continuous_action-455000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e7ce63c95e32eff483707f4bb0aa254b9fb3f387 Binary files /dev/null and b/ppo_fix_continuous_action-455000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-460000.cleanrl_model b/ppo_fix_continuous_action-460000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..09bdc57473bc52ec81a3c0196e6f81490b3e337c Binary files /dev/null and b/ppo_fix_continuous_action-460000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-465000.cleanrl_model b/ppo_fix_continuous_action-465000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3e5d421ba47c836f61dd3a5242c425df5cbbe939 Binary files /dev/null and b/ppo_fix_continuous_action-465000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-470000.cleanrl_model b/ppo_fix_continuous_action-470000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ed445f82cc0f21373a4ae0b20b7bf0ea554f4b19 Binary files /dev/null and b/ppo_fix_continuous_action-470000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-475000.cleanrl_model b/ppo_fix_continuous_action-475000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..393001b182931dac3e59b78c03daa52275821f0e Binary files /dev/null and b/ppo_fix_continuous_action-475000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-480000.cleanrl_model b/ppo_fix_continuous_action-480000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..74ed07092abc6e715437c15a87b7ba02b4c57529 Binary files /dev/null and b/ppo_fix_continuous_action-480000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-485000.cleanrl_model b/ppo_fix_continuous_action-485000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8091dfab898c2e941a2458456260c89ad5e45415 Binary files /dev/null and b/ppo_fix_continuous_action-485000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-490000.cleanrl_model b/ppo_fix_continuous_action-490000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29aff7da360c37163344361601a43ac0908c3115 Binary files /dev/null and b/ppo_fix_continuous_action-490000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-495000.cleanrl_model b/ppo_fix_continuous_action-495000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..80ca978c38b40eb2cf05e76561dc5f1ac1127e01 Binary files /dev/null and b/ppo_fix_continuous_action-495000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-5000.cleanrl_model b/ppo_fix_continuous_action-5000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d45957b50482a7889d3182aad8cc486b62925c86 Binary files /dev/null and b/ppo_fix_continuous_action-5000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-50000.cleanrl_model b/ppo_fix_continuous_action-50000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bd6d52eafc71da9ab00b81eb5f6fa7dbe9959b62 Binary files /dev/null and b/ppo_fix_continuous_action-50000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-500000.cleanrl_model b/ppo_fix_continuous_action-500000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..13653125479670958e8b937bd99e575123832308 Binary files /dev/null and b/ppo_fix_continuous_action-500000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-505000.cleanrl_model b/ppo_fix_continuous_action-505000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a5ff2067cfc50a849f5bfd8f424281013aa419d2 Binary files /dev/null and b/ppo_fix_continuous_action-505000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-510000.cleanrl_model b/ppo_fix_continuous_action-510000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b6a57e1e0d481bad48ec7753c7b4c9fa51882fb8 Binary files /dev/null and b/ppo_fix_continuous_action-510000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-515000.cleanrl_model b/ppo_fix_continuous_action-515000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..24ecdd23438f095beccbd98b860ea21de3187169 Binary files /dev/null and b/ppo_fix_continuous_action-515000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-520000.cleanrl_model b/ppo_fix_continuous_action-520000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e7e572ccfa293ebe6152cc4cbd4982fd842c921d Binary files /dev/null and b/ppo_fix_continuous_action-520000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-525000.cleanrl_model b/ppo_fix_continuous_action-525000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5cbd74774d31044cf2d31e14aed78638787ec7db Binary files /dev/null and b/ppo_fix_continuous_action-525000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-530000.cleanrl_model b/ppo_fix_continuous_action-530000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7870d09c4d4e9ef4512dcc7970ad2386519aaeb8 Binary files /dev/null and b/ppo_fix_continuous_action-530000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-535000.cleanrl_model b/ppo_fix_continuous_action-535000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d347052b32252787f1bd1597372a027e7e18403c Binary files /dev/null and b/ppo_fix_continuous_action-535000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-540000.cleanrl_model b/ppo_fix_continuous_action-540000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..80f56634c4c202565ce83388f6fa2f7881681a27 Binary files /dev/null and b/ppo_fix_continuous_action-540000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-545000.cleanrl_model b/ppo_fix_continuous_action-545000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b919f30bee223b2407f3b57ba5cf4bb63cd1bc7f Binary files /dev/null and b/ppo_fix_continuous_action-545000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-55000.cleanrl_model b/ppo_fix_continuous_action-55000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..51689d80f93e5111eede1f90c522f08bc5072236 Binary files /dev/null and b/ppo_fix_continuous_action-55000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-550000.cleanrl_model b/ppo_fix_continuous_action-550000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4f66cf7de2e12d764599f9a3c2171ceb7d8acbfb Binary files /dev/null and b/ppo_fix_continuous_action-550000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-555000.cleanrl_model b/ppo_fix_continuous_action-555000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cd627cdb758a6313e6fca76f392f4b53438c14e7 Binary files /dev/null and b/ppo_fix_continuous_action-555000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-560000.cleanrl_model b/ppo_fix_continuous_action-560000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..78b93ab34b7e13872a33f49677b83e8fa5c8f5ee Binary files /dev/null and b/ppo_fix_continuous_action-560000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-565000.cleanrl_model b/ppo_fix_continuous_action-565000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8164576f48694ef71143103a1c9fa1e0ba35de53 Binary files /dev/null and b/ppo_fix_continuous_action-565000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-570000.cleanrl_model b/ppo_fix_continuous_action-570000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fcbcc439f7e6a99fb3000e2f472327315a3892a5 Binary files /dev/null and b/ppo_fix_continuous_action-570000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-575000.cleanrl_model b/ppo_fix_continuous_action-575000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..123d673036cabd00d3cadf5f4c6ad0b83c2bb0c7 Binary files /dev/null and b/ppo_fix_continuous_action-575000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-580000.cleanrl_model b/ppo_fix_continuous_action-580000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8c58bc5b4753ab766c057633f930dd33ef2a5bbc Binary files /dev/null and b/ppo_fix_continuous_action-580000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-585000.cleanrl_model b/ppo_fix_continuous_action-585000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0f076d194fbda3bc1406167c0e5c94102560e27f Binary files /dev/null and b/ppo_fix_continuous_action-585000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-590000.cleanrl_model b/ppo_fix_continuous_action-590000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..096c01889f7a2774a688e1738825faeea221d753 Binary files /dev/null and b/ppo_fix_continuous_action-590000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-595000.cleanrl_model b/ppo_fix_continuous_action-595000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ba35e1a6bd9db5363a474f466b14949ced6737b4 Binary files /dev/null and b/ppo_fix_continuous_action-595000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-60000.cleanrl_model b/ppo_fix_continuous_action-60000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..865e6b249a1931fbb99416db551bbb3dfe6c5868 Binary files /dev/null and b/ppo_fix_continuous_action-60000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-600000.cleanrl_model b/ppo_fix_continuous_action-600000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f8e199916c6a7169d3ae290c0aa277b56b09dd77 Binary files /dev/null and b/ppo_fix_continuous_action-600000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-605000.cleanrl_model b/ppo_fix_continuous_action-605000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e8130d7fb55d4c062f28577b535a78cc014f18c9 Binary files /dev/null and b/ppo_fix_continuous_action-605000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-610000.cleanrl_model b/ppo_fix_continuous_action-610000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fbd771fc8c92296e7dead3d064f5c3622cea962d Binary files /dev/null and b/ppo_fix_continuous_action-610000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-615000.cleanrl_model b/ppo_fix_continuous_action-615000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6d6707cc628aacbfc7e721ae13cf9574d6431162 Binary files /dev/null and b/ppo_fix_continuous_action-615000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-620000.cleanrl_model b/ppo_fix_continuous_action-620000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bc8962cd314fde5341cd959162623f9e9c84cadf Binary files /dev/null and b/ppo_fix_continuous_action-620000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-625000.cleanrl_model b/ppo_fix_continuous_action-625000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8d057204faf361eb149fd2287e7354736f792e7d Binary files /dev/null and b/ppo_fix_continuous_action-625000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-630000.cleanrl_model b/ppo_fix_continuous_action-630000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d901c199b0c5f3bd0fecc6e2eb3b8b7c532e3d8b Binary files /dev/null and b/ppo_fix_continuous_action-630000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-635000.cleanrl_model b/ppo_fix_continuous_action-635000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2d8ebad6871df51b1e70342c728b329165880021 Binary files /dev/null and b/ppo_fix_continuous_action-635000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-640000.cleanrl_model b/ppo_fix_continuous_action-640000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d94bc8e3577db2cd6a1fab181e5ffd7495c7dfcf Binary files /dev/null and b/ppo_fix_continuous_action-640000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-645000.cleanrl_model b/ppo_fix_continuous_action-645000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7c31eaefe041629768a6aded5acfc71f1915958d Binary files /dev/null and b/ppo_fix_continuous_action-645000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-65000.cleanrl_model b/ppo_fix_continuous_action-65000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7b28d75fb8308be8add976b3088dbd743bc61be6 Binary files /dev/null and b/ppo_fix_continuous_action-65000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-650000.cleanrl_model b/ppo_fix_continuous_action-650000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29e62f2e99566af96ebdc6c2f17efc08b5c952bb Binary files /dev/null and b/ppo_fix_continuous_action-650000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-655000.cleanrl_model b/ppo_fix_continuous_action-655000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..69d2545bbb16e4f0fff5445feb9f5f36abf0300c Binary files /dev/null and b/ppo_fix_continuous_action-655000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-660000.cleanrl_model b/ppo_fix_continuous_action-660000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..32736304b6b294c950fa82bbd4ce541ef16f0bf0 Binary files /dev/null and b/ppo_fix_continuous_action-660000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-665000.cleanrl_model b/ppo_fix_continuous_action-665000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e70b08c9dc95e4afce63c987aec9067a2bc79ba8 Binary files /dev/null and b/ppo_fix_continuous_action-665000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-670000.cleanrl_model b/ppo_fix_continuous_action-670000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..22606855071a14b0f0eea3fdd58da37fa02a70cb Binary files /dev/null and b/ppo_fix_continuous_action-670000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-675000.cleanrl_model b/ppo_fix_continuous_action-675000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..98fee71ba1f27bd4b7214f8168e00ce16574ed3d Binary files /dev/null and b/ppo_fix_continuous_action-675000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-680000.cleanrl_model b/ppo_fix_continuous_action-680000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5dd447afc357a27df43c4a16dba5c37974c9a4b8 Binary files /dev/null and b/ppo_fix_continuous_action-680000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-685000.cleanrl_model b/ppo_fix_continuous_action-685000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..552876a99c6b7efb34bf6240742ff57bbb6d42da Binary files /dev/null and b/ppo_fix_continuous_action-685000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-690000.cleanrl_model b/ppo_fix_continuous_action-690000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..47fb6cbfc343593206024cd50d9fa04e6e72f0ec Binary files /dev/null and b/ppo_fix_continuous_action-690000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-695000.cleanrl_model b/ppo_fix_continuous_action-695000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8188a76be3eabe86007f3488be42e32e0d5e981f Binary files /dev/null and b/ppo_fix_continuous_action-695000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-70000.cleanrl_model b/ppo_fix_continuous_action-70000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..065aa3a8ed2cc929d5a76f19152daf6a0424b314 Binary files /dev/null and b/ppo_fix_continuous_action-70000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-700000.cleanrl_model b/ppo_fix_continuous_action-700000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..51a2f50b28891c7ee213ee4789cba8c6832c220c Binary files /dev/null and b/ppo_fix_continuous_action-700000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-705000.cleanrl_model b/ppo_fix_continuous_action-705000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..da71e39b4250a2feee8efcdfae54174169af9fa8 Binary files /dev/null and b/ppo_fix_continuous_action-705000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-710000.cleanrl_model b/ppo_fix_continuous_action-710000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8496ba6df2a018f300c6fccb0e6bc1f5bbc0cd91 Binary files /dev/null and b/ppo_fix_continuous_action-710000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-715000.cleanrl_model b/ppo_fix_continuous_action-715000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1ea2e7e87dc9a1e8b506e766b5cdf13ccd49deef Binary files /dev/null and b/ppo_fix_continuous_action-715000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-720000.cleanrl_model b/ppo_fix_continuous_action-720000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bb71f6ca70e7cbd2e1312cdc015e6e17b070e1ed Binary files /dev/null and b/ppo_fix_continuous_action-720000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-725000.cleanrl_model b/ppo_fix_continuous_action-725000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8bd543797adfd8b028406ce200124e472be1e86e Binary files /dev/null and b/ppo_fix_continuous_action-725000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-730000.cleanrl_model b/ppo_fix_continuous_action-730000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..36009f435c6d39dd7c866d5c8afe1294e667e8f6 Binary files /dev/null and b/ppo_fix_continuous_action-730000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-735000.cleanrl_model b/ppo_fix_continuous_action-735000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ceb3da4cabc0df901f52322c1a4bd6ef1c093756 Binary files /dev/null and b/ppo_fix_continuous_action-735000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-740000.cleanrl_model b/ppo_fix_continuous_action-740000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6c9d65f02b641d2b6c39aff516c9206c2bacb10a Binary files /dev/null and b/ppo_fix_continuous_action-740000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-745000.cleanrl_model b/ppo_fix_continuous_action-745000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..74509520134c6f2b0e389203ff0fe3eb8cef1426 Binary files /dev/null and b/ppo_fix_continuous_action-745000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-75000.cleanrl_model b/ppo_fix_continuous_action-75000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..43c7b5c7ce800dac14a356d1d89ffffc7496cb5f Binary files /dev/null and b/ppo_fix_continuous_action-75000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-750000.cleanrl_model b/ppo_fix_continuous_action-750000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2864dba0fad4863a00ab3c9ab6b5c66ef9bdecd7 Binary files /dev/null and b/ppo_fix_continuous_action-750000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-755000.cleanrl_model b/ppo_fix_continuous_action-755000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..607c950f498df1b68879406c9c02eb6adc678322 Binary files /dev/null and b/ppo_fix_continuous_action-755000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-760000.cleanrl_model b/ppo_fix_continuous_action-760000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7124bd9b2c728f9f70fe674f5cfb24d20aa80f9f Binary files /dev/null and b/ppo_fix_continuous_action-760000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-765000.cleanrl_model b/ppo_fix_continuous_action-765000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1bd841f82f4f26ff3a0b98fc7fb41bb4f994b53e Binary files /dev/null and b/ppo_fix_continuous_action-765000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-770000.cleanrl_model b/ppo_fix_continuous_action-770000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b8c13109946308039b5e6b0031d40cf3d6742a1a Binary files /dev/null and b/ppo_fix_continuous_action-770000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-775000.cleanrl_model b/ppo_fix_continuous_action-775000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..488b9888a0a35320518925cf6a5ba9b0dfcb2cd2 Binary files /dev/null and b/ppo_fix_continuous_action-775000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-780000.cleanrl_model b/ppo_fix_continuous_action-780000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a46b6491078c2b3d033ae516ffabe886e7ea818f Binary files /dev/null and b/ppo_fix_continuous_action-780000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-785000.cleanrl_model b/ppo_fix_continuous_action-785000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..468b10c04d196c87f407fb4f7804f84b8dace74f Binary files /dev/null and b/ppo_fix_continuous_action-785000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-790000.cleanrl_model b/ppo_fix_continuous_action-790000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7483e25c7ee9d8d6800da9f77934e014ff1c7e88 Binary files /dev/null and b/ppo_fix_continuous_action-790000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-795000.cleanrl_model b/ppo_fix_continuous_action-795000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9e74d83740b8f9347108fb30a2053e730208133a Binary files /dev/null and b/ppo_fix_continuous_action-795000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-80000.cleanrl_model b/ppo_fix_continuous_action-80000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fbd5328335f82fd0d2be22d1030599454a7f8d32 Binary files /dev/null and b/ppo_fix_continuous_action-80000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-800000.cleanrl_model b/ppo_fix_continuous_action-800000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29c47dc863849516c6df63f8a8bf4d25d1239538 Binary files /dev/null and b/ppo_fix_continuous_action-800000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-805000.cleanrl_model b/ppo_fix_continuous_action-805000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..89ed7d86ecab3f805aab1a6ee3890a0bf9eef299 Binary files /dev/null and b/ppo_fix_continuous_action-805000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-810000.cleanrl_model b/ppo_fix_continuous_action-810000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6f62a25bfaa38bd41f21e3c16f5fe42b8e484c5f Binary files /dev/null and b/ppo_fix_continuous_action-810000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-815000.cleanrl_model b/ppo_fix_continuous_action-815000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..55c860e6ccc00682c17a622a61da7f7dc9dc627e Binary files /dev/null and b/ppo_fix_continuous_action-815000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-820000.cleanrl_model b/ppo_fix_continuous_action-820000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ed82e66e34bd8b7923fc4401315c6afb982405f9 Binary files /dev/null and b/ppo_fix_continuous_action-820000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-825000.cleanrl_model b/ppo_fix_continuous_action-825000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..71821f60d00b91886b44dcc5e38c8fc77872116a Binary files /dev/null and b/ppo_fix_continuous_action-825000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-830000.cleanrl_model b/ppo_fix_continuous_action-830000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f99bdb65f4f80e947245e99bc781a02abbd1434a Binary files /dev/null and b/ppo_fix_continuous_action-830000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-835000.cleanrl_model b/ppo_fix_continuous_action-835000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2c7f19f75babbf111e76c6c8fef28fc8d4f39d4f Binary files /dev/null and b/ppo_fix_continuous_action-835000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-840000.cleanrl_model b/ppo_fix_continuous_action-840000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5eeec61c2616480454b283fa40a3f4a31419fdfb Binary files /dev/null and b/ppo_fix_continuous_action-840000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-845000.cleanrl_model b/ppo_fix_continuous_action-845000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5f0666e870675ba185db33660d153ff524c453cd Binary files /dev/null and b/ppo_fix_continuous_action-845000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-85000.cleanrl_model b/ppo_fix_continuous_action-85000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..eb88b720db805fda00123043e3431b3b480a4aed Binary files /dev/null and b/ppo_fix_continuous_action-85000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-850000.cleanrl_model b/ppo_fix_continuous_action-850000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9f26f6e0834b82a6a0879981f504dba6aae267de Binary files /dev/null and b/ppo_fix_continuous_action-850000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-855000.cleanrl_model b/ppo_fix_continuous_action-855000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..aea54ea83d481cb976ecf180b4f9220c6a06baf2 Binary files /dev/null and b/ppo_fix_continuous_action-855000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-860000.cleanrl_model b/ppo_fix_continuous_action-860000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ce0563cf334c9aaadf2037eff9c66e9e5b45b5eb Binary files /dev/null and b/ppo_fix_continuous_action-860000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-865000.cleanrl_model b/ppo_fix_continuous_action-865000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..059dcea633bb33f70841edd6b7a964cf8d9ca19e Binary files /dev/null and b/ppo_fix_continuous_action-865000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-870000.cleanrl_model b/ppo_fix_continuous_action-870000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..00fdec74d65debab09b89d9b813b2e1683ebebbb Binary files /dev/null and b/ppo_fix_continuous_action-870000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-875000.cleanrl_model b/ppo_fix_continuous_action-875000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1971b77c69e7d33b2d8cf09fbd7c515a9f07908b Binary files /dev/null and b/ppo_fix_continuous_action-875000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-880000.cleanrl_model b/ppo_fix_continuous_action-880000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a9486f3415621cd4eb4989f6f6113ff42be2cc9f Binary files /dev/null and b/ppo_fix_continuous_action-880000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-885000.cleanrl_model b/ppo_fix_continuous_action-885000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e72f4fe800149f78a5d70e2aff5d4f2ec5cddf45 Binary files /dev/null and b/ppo_fix_continuous_action-885000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-890000.cleanrl_model b/ppo_fix_continuous_action-890000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1ef2fb56f294dbd0a96b45f51a1cd1f2f21776fc Binary files /dev/null and b/ppo_fix_continuous_action-890000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-895000.cleanrl_model b/ppo_fix_continuous_action-895000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ffe2b6859884ad683d521270f2c96068dec3e940 Binary files /dev/null and b/ppo_fix_continuous_action-895000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-90000.cleanrl_model b/ppo_fix_continuous_action-90000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5e1ed18d295e94bee78ccf06d4a1d3a05842ce7d Binary files /dev/null and b/ppo_fix_continuous_action-90000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-900000.cleanrl_model b/ppo_fix_continuous_action-900000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c5f00c2d10cd4b81178f43519257564340d93e86 Binary files /dev/null and b/ppo_fix_continuous_action-900000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-905000.cleanrl_model b/ppo_fix_continuous_action-905000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bb87650f4ea81c56179b23e074831de6f21e4320 Binary files /dev/null and b/ppo_fix_continuous_action-905000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-910000.cleanrl_model b/ppo_fix_continuous_action-910000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..36f516c0392bd029e089c75ea8ba4db7b4ef8e86 Binary files /dev/null and b/ppo_fix_continuous_action-910000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-915000.cleanrl_model b/ppo_fix_continuous_action-915000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a50922467a3e9b71e8448fda33b1159e6db7d4c9 Binary files /dev/null and b/ppo_fix_continuous_action-915000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-920000.cleanrl_model b/ppo_fix_continuous_action-920000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..11f38f7742d32e2b0ed638fe111b821be86a3849 Binary files /dev/null and b/ppo_fix_continuous_action-920000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-925000.cleanrl_model b/ppo_fix_continuous_action-925000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..01b6e88263ab59f391d4476dff46f91b7a30f82f Binary files /dev/null and b/ppo_fix_continuous_action-925000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-930000.cleanrl_model b/ppo_fix_continuous_action-930000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e7c95b062a6cd7f90b390e132fe0a522ce97367e Binary files /dev/null and b/ppo_fix_continuous_action-930000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-935000.cleanrl_model b/ppo_fix_continuous_action-935000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..27f77e9ee1ed255c10e997c2cbf564c72cfcc7d0 Binary files /dev/null and b/ppo_fix_continuous_action-935000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-940000.cleanrl_model b/ppo_fix_continuous_action-940000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..76c3bd810cc9311724a2cb67fc40dd849f4cb506 Binary files /dev/null and b/ppo_fix_continuous_action-940000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-945000.cleanrl_model b/ppo_fix_continuous_action-945000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0f67995063f0b116a823fab112ba118a20297577 Binary files /dev/null and b/ppo_fix_continuous_action-945000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-95000.cleanrl_model b/ppo_fix_continuous_action-95000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a9d3887b5238fa0b75b67de1b9ad2b0c0828191d Binary files /dev/null and b/ppo_fix_continuous_action-95000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-950000.cleanrl_model b/ppo_fix_continuous_action-950000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..456704eb2623acead9570d77ff207160d37fdb98 Binary files /dev/null and b/ppo_fix_continuous_action-950000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-955000.cleanrl_model b/ppo_fix_continuous_action-955000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e48dd768cdc0ad6ec38f6b41bf77576683a8d2f3 Binary files /dev/null and b/ppo_fix_continuous_action-955000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-960000.cleanrl_model b/ppo_fix_continuous_action-960000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..60089119f9ae51e0dae4b0912e1f62f02b9a9238 Binary files /dev/null and b/ppo_fix_continuous_action-960000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-965000.cleanrl_model b/ppo_fix_continuous_action-965000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7ecca11fdc918e8c2eadbe0fa75eb89d1a9c9c43 Binary files /dev/null and b/ppo_fix_continuous_action-965000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-970000.cleanrl_model b/ppo_fix_continuous_action-970000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cd557efec375ba530de12195316b0196e30ff1db Binary files /dev/null and b/ppo_fix_continuous_action-970000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-975000.cleanrl_model b/ppo_fix_continuous_action-975000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ef257025f7e1e9de616236824b6ee44c8eb3d13d Binary files /dev/null and b/ppo_fix_continuous_action-975000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-980000.cleanrl_model b/ppo_fix_continuous_action-980000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..26c4f1c0303868cc1dec11754e42b24d36562c42 Binary files /dev/null and b/ppo_fix_continuous_action-980000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-985000.cleanrl_model b/ppo_fix_continuous_action-985000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f36ef0fbed426ccb2ce67727a821d1e3ccce4447 Binary files /dev/null and b/ppo_fix_continuous_action-985000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-990000.cleanrl_model b/ppo_fix_continuous_action-990000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3ac32b9d6963cb64229f9015acb975fae4d750d1 Binary files /dev/null and b/ppo_fix_continuous_action-990000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-995000.cleanrl_model b/ppo_fix_continuous_action-995000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1ecfb4df5be54f3ca299d2e0659864477d14736a Binary files /dev/null and b/ppo_fix_continuous_action-995000.cleanrl_model differ diff --git a/ppo_fix_continuous_action.cleanrl_model b/ppo_fix_continuous_action.cleanrl_model index 9c0035047899236d4858a72e03468385ca54f63c..46105ed3ea6b9b8abcea0e4a2999617100f2547b 100644 Binary files a/ppo_fix_continuous_action.cleanrl_model and b/ppo_fix_continuous_action.cleanrl_model differ diff --git a/ppo_fix_continuous_action.py b/ppo_fix_continuous_action.py index f204e4e83e13f5da11c5e6e100efdc9b35ea4674..0d9b7c61f4d8f53a5f7c70e7065fa07fe9ad7a57 100644 --- a/ppo_fix_continuous_action.py +++ b/ppo_fix_continuous_action.py @@ -229,7 +229,7 @@ def evaluate( envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, capture_video, run_name, agent.obs_rms)]) obs, _ = envs.reset() - episodic_returns = [] + episodic_returns, episodic_lengths = [], [] while len(episodic_returns) < eval_episodes: actions, _, _, _ = agent.get_action_and_value(torch.Tensor(obs).to(device)) next_obs, _, _, _, infos = envs.step(actions.cpu().numpy()) @@ -239,9 +239,10 @@ def evaluate( continue print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}") episodic_returns += [info["episode"]["r"]] + episodic_lengths += [info["episode"]["l"]] obs = next_obs - return episodic_returns + return episodic_returns, episodic_lengths def make_env(env_id, idx, capture_video, run_name, gamma): @@ -436,6 +437,28 @@ if __name__ == "__main__": terminal_value = agent.get_value(torch.Tensor(real_next_obs).to(device)).reshape(1, -1)[0][0] rewards[step][idx] += args.gamma * terminal_value + if global_step % (5000 // args.num_envs * args.num_envs) == 0: + obs_rms, return_rms = get_rms(envs.envs[0]) + agent.obs_rms = copy.deepcopy(get_rms(envs.envs[0])[0]) + model_path = f"runs/{run_name}/{args.exp_name}-{global_step}.cleanrl_model" + torch.save(agent.state_dict(), model_path) + print(f"model saved to {model_path}") + + episodic_returns, episodic_lengths = evaluate( + model_path, + make_eval_env, + args.env_id, + eval_episodes=3, + run_name=f"{run_name}-eval", + Model=Agent, + device=device, + capture_video=False, + ) + + print(episodic_returns, episodic_lengths) + writer.add_scalar("charts/eval/episodic_return", np.mean(episodic_returns), global_step) + writer.add_scalar("charts/eval/episodic_length", np.mean(episodic_lengths), global_step) + # Only print when at least 1 env is done if "final_info" not in infos: continue @@ -549,7 +572,7 @@ if __name__ == "__main__": torch.save(agent.state_dict(), model_path) print(f"model saved to {model_path}") - episodic_returns = evaluate( + episodic_returns, episodic_lengths = evaluate( model_path, make_eval_env, args.env_id, diff --git a/replay.mp4 b/replay.mp4 index 7238459123c1d55a06ac5ac8ca03eebd74b433c5..30316213d06575d3a4402d31369c94b9863c327c 100644 Binary files a/replay.mp4 and b/replay.mp4 differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-0.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-0.mp4 deleted file mode 100644 index 4f7657aab70e51fc85aee4ed2737475699bfb9f5..0000000000000000000000000000000000000000 Binary files a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-0.mp4 and /dev/null differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-1.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-1.mp4 deleted file mode 100644 index 0879291856508ec9c6b8d21ad9c72b1d0b3cf792..0000000000000000000000000000000000000000 Binary files a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-1.mp4 and /dev/null differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-8.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-8.mp4 deleted file mode 100644 index 7238459123c1d55a06ac5ac8ca03eebd74b433c5..0000000000000000000000000000000000000000 Binary files a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705702812-eval/rl-video-episode-8.mp4 and /dev/null differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-0.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..36ee580b3f61296e09a9a9138a0aaf2d0bafd7f8 Binary files /dev/null and b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-0.mp4 differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-1.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2ba60ad9adf0c08e51fed25e1625be88a313917b Binary files /dev/null and b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-1.mp4 differ diff --git a/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-8.mp4 b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-8.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..30316213d06575d3a4402d31369c94b9863c327c Binary files /dev/null and b/videos/Humanoid-v4__ppo_fix_continuous_action__4__1705733766-eval/rl-video-episode-8.mp4 differ