diff --git a/README.md b/README.md index a4fe59a6da262227a62fd8100761a83bfacb4611..1b95a21d7067cd824ef45c90793eb6fa9d67c973 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ model-index: type: Swimmer-v4 metrics: - type: mean_reward - value: 98.10 +/- 18.12 + value: 64.02 +/- 0.90 name: mean_reward verified: false --- diff --git a/events.out.tfevents.1705695057.3090-172.2567669.0 b/events.out.tfevents.1705695057.3090-172.2567669.0 deleted file mode 100644 index 771ee60b1a66c46b9e73f8c181f0104ffe538ebb..0000000000000000000000000000000000000000 --- a/events.out.tfevents.1705695057.3090-172.2567669.0 +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:441321d22eee6cc2feea5bf95f547dd9f99d117ea9dd8756cd1be81af84e8f89 -size 376390 diff --git a/events.out.tfevents.1705726200.4090-171.2583907.0 b/events.out.tfevents.1705726200.4090-171.2583907.0 new file mode 100644 index 0000000000000000000000000000000000000000..875995e5df2517e0a9bc08fbfc42af2d6241fa1b --- /dev/null +++ b/events.out.tfevents.1705726200.4090-171.2583907.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff2bdaf73ca0965a91e398cfba47f114a98b34227751a3acc4280dc74b813388 +size 403050 diff --git a/ppo_fix_continuous_action-10000.cleanrl_model b/ppo_fix_continuous_action-10000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a3a952d30ff5fb2da33be09a8c05bd477974563c Binary files /dev/null and b/ppo_fix_continuous_action-10000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-100000.cleanrl_model b/ppo_fix_continuous_action-100000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f83d15bf88f2a7ab08083132406b54f5f59b1d3e Binary files /dev/null and b/ppo_fix_continuous_action-100000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-105000.cleanrl_model b/ppo_fix_continuous_action-105000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bc54659f0f3f6755fd3e88ca873d084529fec861 Binary files /dev/null and b/ppo_fix_continuous_action-105000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-110000.cleanrl_model b/ppo_fix_continuous_action-110000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3128ca04fe675c468d5ea13f040f0088686c44eb Binary files /dev/null and b/ppo_fix_continuous_action-110000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-115000.cleanrl_model b/ppo_fix_continuous_action-115000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3e5e30e06aa811699c37272d9a4b3b25c8462a0d Binary files /dev/null and b/ppo_fix_continuous_action-115000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-120000.cleanrl_model b/ppo_fix_continuous_action-120000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..04d2320f540635d8ac7432a2fec4f1156ccdc188 Binary files /dev/null and b/ppo_fix_continuous_action-120000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-125000.cleanrl_model b/ppo_fix_continuous_action-125000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..30d369ce780d1a236b698f44bb013e40baf6fac2 Binary files /dev/null and b/ppo_fix_continuous_action-125000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-130000.cleanrl_model b/ppo_fix_continuous_action-130000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fd6bc126535d1ed2d5c5af83c3f3f6448c8daebf Binary files /dev/null and b/ppo_fix_continuous_action-130000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-135000.cleanrl_model b/ppo_fix_continuous_action-135000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8be072fed3d0cef2c58f25ec6d95343445840fb3 Binary files /dev/null and b/ppo_fix_continuous_action-135000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-140000.cleanrl_model b/ppo_fix_continuous_action-140000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4fcbad3f1e91fedc81e628c03acd693da646a266 Binary files /dev/null and b/ppo_fix_continuous_action-140000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-145000.cleanrl_model b/ppo_fix_continuous_action-145000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..29158105e40d0878027004d79fdfa470311154b9 Binary files /dev/null and b/ppo_fix_continuous_action-145000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-15000.cleanrl_model b/ppo_fix_continuous_action-15000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d63ff72354ffb526ff8eeb086e3e4dca808a95cd Binary files /dev/null and b/ppo_fix_continuous_action-15000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-150000.cleanrl_model b/ppo_fix_continuous_action-150000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..354af7d3a5bba07d35c5536e97a163b0c64e5e6b Binary files /dev/null and b/ppo_fix_continuous_action-150000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-155000.cleanrl_model b/ppo_fix_continuous_action-155000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..45731382d8d5286962a2f8b5888cbc7d23e5ba3a Binary files /dev/null and b/ppo_fix_continuous_action-155000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-160000.cleanrl_model b/ppo_fix_continuous_action-160000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d442913304a54396625cd4fd73739643383d7fde Binary files /dev/null and b/ppo_fix_continuous_action-160000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-165000.cleanrl_model b/ppo_fix_continuous_action-165000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f25cdac910181e96908c92daa3dc75d6d8db775e Binary files /dev/null and b/ppo_fix_continuous_action-165000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-170000.cleanrl_model b/ppo_fix_continuous_action-170000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..662ddb3859ac1578ecdd0f837e53db731be5e35a Binary files /dev/null and b/ppo_fix_continuous_action-170000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-175000.cleanrl_model b/ppo_fix_continuous_action-175000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..76631f984c2d8165722521ac8027737b9e072041 Binary files /dev/null and b/ppo_fix_continuous_action-175000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-180000.cleanrl_model b/ppo_fix_continuous_action-180000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2176fa8951f1ac12cc328f66069871746bd8e5b8 Binary files /dev/null and b/ppo_fix_continuous_action-180000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-185000.cleanrl_model b/ppo_fix_continuous_action-185000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..508be8fcb63830dd61f92028118f913b25d0d35f Binary files /dev/null and b/ppo_fix_continuous_action-185000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-190000.cleanrl_model b/ppo_fix_continuous_action-190000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9a14d6ad323e04708d4913bdb40d23a81d5c5cff Binary files /dev/null and b/ppo_fix_continuous_action-190000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-195000.cleanrl_model b/ppo_fix_continuous_action-195000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..322151814326862af13337386bac6f38b7a935b8 Binary files /dev/null and b/ppo_fix_continuous_action-195000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-20000.cleanrl_model b/ppo_fix_continuous_action-20000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..86cb320d7bad8d3d38b3a65758efad3ccb44eb0a Binary files /dev/null and b/ppo_fix_continuous_action-20000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-200000.cleanrl_model b/ppo_fix_continuous_action-200000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7d337600e272a26e7d4c08bd75c54afc2ad786c3 Binary files /dev/null and b/ppo_fix_continuous_action-200000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-205000.cleanrl_model b/ppo_fix_continuous_action-205000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c5dff8b0bf4b0573bc73b6576691c1740ce4900c Binary files /dev/null and b/ppo_fix_continuous_action-205000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-210000.cleanrl_model b/ppo_fix_continuous_action-210000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2b31aebf0cc941057cf8fe42672d95691a5cc51f Binary files /dev/null and b/ppo_fix_continuous_action-210000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-215000.cleanrl_model b/ppo_fix_continuous_action-215000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..039c507766a5bf8338d355c9acc6306d5cbb05b3 Binary files /dev/null and b/ppo_fix_continuous_action-215000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-220000.cleanrl_model b/ppo_fix_continuous_action-220000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ebd281675f9cc4df583c2b64f158cbd3f438b8aa Binary files /dev/null and b/ppo_fix_continuous_action-220000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-225000.cleanrl_model b/ppo_fix_continuous_action-225000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7b579743ee447877f5cf0e1e48d566b9ddd3126a Binary files /dev/null and b/ppo_fix_continuous_action-225000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-230000.cleanrl_model b/ppo_fix_continuous_action-230000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..398e301b39c9aed18e1742a7299230fa8f97898a Binary files /dev/null and b/ppo_fix_continuous_action-230000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-235000.cleanrl_model b/ppo_fix_continuous_action-235000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0277c192250b7871731d3015f730cd9d5f82d1b6 Binary files /dev/null and b/ppo_fix_continuous_action-235000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-240000.cleanrl_model b/ppo_fix_continuous_action-240000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3d58de078830eeb766a924764d7cefd6fc41c3e1 Binary files /dev/null and b/ppo_fix_continuous_action-240000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-245000.cleanrl_model b/ppo_fix_continuous_action-245000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..586925e4fcef6ed62b64d3040ded468f75cedd1d Binary files /dev/null and b/ppo_fix_continuous_action-245000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-25000.cleanrl_model b/ppo_fix_continuous_action-25000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a5c089938e8341467e3999871f58436d6866a136 Binary files /dev/null and b/ppo_fix_continuous_action-25000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-250000.cleanrl_model b/ppo_fix_continuous_action-250000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ddaa8af996624cbc363d6bf1be5d4ec7559f5d83 Binary files /dev/null and b/ppo_fix_continuous_action-250000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-255000.cleanrl_model b/ppo_fix_continuous_action-255000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5261e08a9ca1c978ae53b2f9721bc46354eb368a Binary files /dev/null and b/ppo_fix_continuous_action-255000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-260000.cleanrl_model b/ppo_fix_continuous_action-260000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e2659add6d80605974835b8bc0dc2d9cf682668f Binary files /dev/null and b/ppo_fix_continuous_action-260000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-265000.cleanrl_model b/ppo_fix_continuous_action-265000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..71f83c262918f6b2eef82ea073be477c7709f076 Binary files /dev/null and b/ppo_fix_continuous_action-265000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-270000.cleanrl_model b/ppo_fix_continuous_action-270000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..419e32515dc45fd5900c586141b8ddb698e076cb Binary files /dev/null and b/ppo_fix_continuous_action-270000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-275000.cleanrl_model b/ppo_fix_continuous_action-275000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..382777a8b506f8752efe55a91cf05c5653d0a898 Binary files /dev/null and b/ppo_fix_continuous_action-275000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-280000.cleanrl_model b/ppo_fix_continuous_action-280000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fca60271739a29116ecf3ac48b9be4c9bc7c5742 Binary files /dev/null and b/ppo_fix_continuous_action-280000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-285000.cleanrl_model b/ppo_fix_continuous_action-285000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1ed4cefdbee68de8c0932ca8739636fc36c63d4c Binary files /dev/null and b/ppo_fix_continuous_action-285000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-290000.cleanrl_model b/ppo_fix_continuous_action-290000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d1acecd053f31266aa1c3423b6784d934429d965 Binary files /dev/null and b/ppo_fix_continuous_action-290000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-295000.cleanrl_model b/ppo_fix_continuous_action-295000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5d4631e0a3719f96436c6bd9c50a834853c3feaa Binary files /dev/null and b/ppo_fix_continuous_action-295000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-30000.cleanrl_model b/ppo_fix_continuous_action-30000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8386367db657b1309c8367c4bf1c8154bea45e00 Binary files /dev/null and b/ppo_fix_continuous_action-30000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-300000.cleanrl_model b/ppo_fix_continuous_action-300000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2babb4ab38b27075548a80c1770ba223d0c5b3af Binary files /dev/null and b/ppo_fix_continuous_action-300000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-305000.cleanrl_model b/ppo_fix_continuous_action-305000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f03b5ea31e4a95a442de480dc3a605e143388b39 Binary files /dev/null and b/ppo_fix_continuous_action-305000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-310000.cleanrl_model b/ppo_fix_continuous_action-310000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..715a954087bd6ad5776a6a408c12d7611a747904 Binary files /dev/null and b/ppo_fix_continuous_action-310000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-315000.cleanrl_model b/ppo_fix_continuous_action-315000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..45cd33542dec89242359399570ea27833b557dca Binary files /dev/null and b/ppo_fix_continuous_action-315000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-320000.cleanrl_model b/ppo_fix_continuous_action-320000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b50dcffcddca62ece0eab499a31a0d8caf515f11 Binary files /dev/null and b/ppo_fix_continuous_action-320000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-325000.cleanrl_model b/ppo_fix_continuous_action-325000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ba92a1f7dff18abf24fbdc52822ab28127ca7d4f Binary files /dev/null and b/ppo_fix_continuous_action-325000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-330000.cleanrl_model b/ppo_fix_continuous_action-330000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..da1fcec9633b9caf5b3b7620726be2324a0321c9 Binary files /dev/null and b/ppo_fix_continuous_action-330000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-335000.cleanrl_model b/ppo_fix_continuous_action-335000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4c45564b3d043309b874e66adb187903e3635567 Binary files /dev/null and b/ppo_fix_continuous_action-335000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-340000.cleanrl_model b/ppo_fix_continuous_action-340000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a39740b1a3707131f81bf59371ce94618564f207 Binary files /dev/null and b/ppo_fix_continuous_action-340000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-345000.cleanrl_model b/ppo_fix_continuous_action-345000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..933872e8603fb567c58ab9aa630c7ca3fa56b3c3 Binary files /dev/null and b/ppo_fix_continuous_action-345000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-35000.cleanrl_model b/ppo_fix_continuous_action-35000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..410939c31edddca02f939851875a9a06326ade29 Binary files /dev/null and b/ppo_fix_continuous_action-35000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-350000.cleanrl_model b/ppo_fix_continuous_action-350000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9726561417cd2ce27f95538fa06282a5463c6150 Binary files /dev/null and b/ppo_fix_continuous_action-350000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-355000.cleanrl_model b/ppo_fix_continuous_action-355000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3ceb6821cf4b0ee0aa6418644fd0b99da252b08c Binary files /dev/null and b/ppo_fix_continuous_action-355000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-360000.cleanrl_model b/ppo_fix_continuous_action-360000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b0e7ede2aefcc730475c1805297e0a67f70772cb Binary files /dev/null and b/ppo_fix_continuous_action-360000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-365000.cleanrl_model b/ppo_fix_continuous_action-365000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..72f19efee0319e7047fbbed69247064b2a0616d8 Binary files /dev/null and b/ppo_fix_continuous_action-365000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-370000.cleanrl_model b/ppo_fix_continuous_action-370000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8a570a7b50e0e627d9427dc716c681b03df37e24 Binary files /dev/null and b/ppo_fix_continuous_action-370000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-375000.cleanrl_model b/ppo_fix_continuous_action-375000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..98f8b78a8b82f64972ff62206bf74e9527ee9fc8 Binary files /dev/null and b/ppo_fix_continuous_action-375000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-380000.cleanrl_model b/ppo_fix_continuous_action-380000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..27d78a6ef597ec044157cd4aa83fd32ecf5d5c1a Binary files /dev/null and b/ppo_fix_continuous_action-380000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-385000.cleanrl_model b/ppo_fix_continuous_action-385000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ec866c0e07e047c21a41e50c13ea42439045fd0f Binary files /dev/null and b/ppo_fix_continuous_action-385000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-390000.cleanrl_model b/ppo_fix_continuous_action-390000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..40912e53822722a96059a94068b204092db1f021 Binary files /dev/null and b/ppo_fix_continuous_action-390000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-395000.cleanrl_model b/ppo_fix_continuous_action-395000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..118fd57888feae37a4ae09f560bd428e93479dac Binary files /dev/null and b/ppo_fix_continuous_action-395000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-40000.cleanrl_model b/ppo_fix_continuous_action-40000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0b1e2dff0d2e0e0f9a14c3e9282c1a03329c95d9 Binary files /dev/null and b/ppo_fix_continuous_action-40000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-400000.cleanrl_model b/ppo_fix_continuous_action-400000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..53e8d30491d2ced3030a3fd48cb3a5b0b7823753 Binary files /dev/null and b/ppo_fix_continuous_action-400000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-405000.cleanrl_model b/ppo_fix_continuous_action-405000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0f5d8fdd9589cc36e340ff4c3338e01c1db5215b Binary files /dev/null and b/ppo_fix_continuous_action-405000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-410000.cleanrl_model b/ppo_fix_continuous_action-410000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f53c9380da220e1240954be867fce63af81992d1 Binary files /dev/null and b/ppo_fix_continuous_action-410000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-415000.cleanrl_model b/ppo_fix_continuous_action-415000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..887807d88e59d239e5bb003bafc19b5afd0100ae Binary files /dev/null and b/ppo_fix_continuous_action-415000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-420000.cleanrl_model b/ppo_fix_continuous_action-420000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d6f42b5c75d605a77118fd7248ac0533972dc045 Binary files /dev/null and b/ppo_fix_continuous_action-420000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-425000.cleanrl_model b/ppo_fix_continuous_action-425000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ce05c0ffae6c3d340157215b47e0e8c8ca66175d Binary files /dev/null and b/ppo_fix_continuous_action-425000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-430000.cleanrl_model b/ppo_fix_continuous_action-430000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..091807f47b413119fdc4073127643724464ed1ee Binary files /dev/null and b/ppo_fix_continuous_action-430000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-435000.cleanrl_model b/ppo_fix_continuous_action-435000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fdff1f9645414ca8d95470fc8cc723de400d053d Binary files /dev/null and b/ppo_fix_continuous_action-435000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-440000.cleanrl_model b/ppo_fix_continuous_action-440000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..491ef79e9ed288c6a7f59c0494ca7cac8f7a147f Binary files /dev/null and b/ppo_fix_continuous_action-440000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-445000.cleanrl_model b/ppo_fix_continuous_action-445000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f840f416b6fc19734035be0ee34cc49c56a59037 Binary files /dev/null and b/ppo_fix_continuous_action-445000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-45000.cleanrl_model b/ppo_fix_continuous_action-45000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e59c8be8c4146b93dab7939f69da41457445d302 Binary files /dev/null and b/ppo_fix_continuous_action-45000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-450000.cleanrl_model b/ppo_fix_continuous_action-450000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e2872c2164c1e9d484dfb4269f46b4a09ef8a2a0 Binary files /dev/null and b/ppo_fix_continuous_action-450000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-455000.cleanrl_model b/ppo_fix_continuous_action-455000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c7c735e99570e9a9d777e5a129f65fdf18fb0fd4 Binary files /dev/null and b/ppo_fix_continuous_action-455000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-460000.cleanrl_model b/ppo_fix_continuous_action-460000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8277dcb6fb150815e760dfdf50f58c410dbaf185 Binary files /dev/null and b/ppo_fix_continuous_action-460000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-465000.cleanrl_model b/ppo_fix_continuous_action-465000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5a725193129c8ffb18488a59c1b7566b8a74db43 Binary files /dev/null and b/ppo_fix_continuous_action-465000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-470000.cleanrl_model b/ppo_fix_continuous_action-470000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ac7d97d9aacd23ffabdcf29a5fb2f68938873ec7 Binary files /dev/null and b/ppo_fix_continuous_action-470000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-475000.cleanrl_model b/ppo_fix_continuous_action-475000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..64fed6a0c24ce1a882f78af416effdc5c1a90869 Binary files /dev/null and b/ppo_fix_continuous_action-475000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-480000.cleanrl_model b/ppo_fix_continuous_action-480000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0a76a874ba67d1b347aa860464990aaec0db23d6 Binary files /dev/null and b/ppo_fix_continuous_action-480000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-485000.cleanrl_model b/ppo_fix_continuous_action-485000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..989035eeb4101dc5723a4b0ad90f2a572abd74fe Binary files /dev/null and b/ppo_fix_continuous_action-485000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-490000.cleanrl_model b/ppo_fix_continuous_action-490000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..470906831281711491eb494add059873dcea7d08 Binary files /dev/null and b/ppo_fix_continuous_action-490000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-495000.cleanrl_model b/ppo_fix_continuous_action-495000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bec47c444ea6c9d8b8baafac81ef032525a4f664 Binary files /dev/null and b/ppo_fix_continuous_action-495000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-5000.cleanrl_model b/ppo_fix_continuous_action-5000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..41e4e8b5ff9fbf0227684497c9372fe975375c57 Binary files /dev/null and b/ppo_fix_continuous_action-5000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-50000.cleanrl_model b/ppo_fix_continuous_action-50000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5a4b31199660dd24ef3080c60912f40d95058bb5 Binary files /dev/null and b/ppo_fix_continuous_action-50000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-500000.cleanrl_model b/ppo_fix_continuous_action-500000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..dd3cbdd30ce499cea532012bd219f0a41f1b130b Binary files /dev/null and b/ppo_fix_continuous_action-500000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-505000.cleanrl_model b/ppo_fix_continuous_action-505000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..87b1e6a4d4238b6e22956b6a982cda20a7144bbb Binary files /dev/null and b/ppo_fix_continuous_action-505000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-510000.cleanrl_model b/ppo_fix_continuous_action-510000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a949f3ef575e16d87988b16cd23aca6fe6aaafb4 Binary files /dev/null and b/ppo_fix_continuous_action-510000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-515000.cleanrl_model b/ppo_fix_continuous_action-515000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6f34a4fc2cd0756ce03666821bd9d028b265e260 Binary files /dev/null and b/ppo_fix_continuous_action-515000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-520000.cleanrl_model b/ppo_fix_continuous_action-520000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..11281cd817e060d326b6445bf1284be478f7658f Binary files /dev/null and b/ppo_fix_continuous_action-520000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-525000.cleanrl_model b/ppo_fix_continuous_action-525000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2e4af97a3f1515a016c8950e6b6328df33f242e4 Binary files /dev/null and b/ppo_fix_continuous_action-525000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-530000.cleanrl_model b/ppo_fix_continuous_action-530000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bbb8eb997dc7689a21b0903ee074353d75c05c6c Binary files /dev/null and b/ppo_fix_continuous_action-530000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-535000.cleanrl_model b/ppo_fix_continuous_action-535000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7b74f9ea448621ed8742a49aafd38abf26d3bbb5 Binary files /dev/null and b/ppo_fix_continuous_action-535000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-540000.cleanrl_model b/ppo_fix_continuous_action-540000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d7aec688308ca22ba248b5cbaee58f8bcfa13328 Binary files /dev/null and b/ppo_fix_continuous_action-540000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-545000.cleanrl_model b/ppo_fix_continuous_action-545000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c9955c0fb17d587fc25780d371f8d20651241ee4 Binary files /dev/null and b/ppo_fix_continuous_action-545000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-55000.cleanrl_model b/ppo_fix_continuous_action-55000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bce90afd1796d6f8fb9dd3d4d662a3a09978ae72 Binary files /dev/null and b/ppo_fix_continuous_action-55000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-550000.cleanrl_model b/ppo_fix_continuous_action-550000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..036b28c47178e90b0b631b0cba66d8a2515919c0 Binary files /dev/null and b/ppo_fix_continuous_action-550000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-555000.cleanrl_model b/ppo_fix_continuous_action-555000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7198b1f5ed48e399dcee7232ed99364065198e76 Binary files /dev/null and b/ppo_fix_continuous_action-555000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-560000.cleanrl_model b/ppo_fix_continuous_action-560000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..97cf7e9ffebd84d6a2af22adb672ce0c92d1b756 Binary files /dev/null and b/ppo_fix_continuous_action-560000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-565000.cleanrl_model b/ppo_fix_continuous_action-565000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5a14da2c9e60ed8ed57d864b867b62baac495352 Binary files /dev/null and b/ppo_fix_continuous_action-565000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-570000.cleanrl_model b/ppo_fix_continuous_action-570000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..99f58cf5e72113d7eb8c9f1d8dbefb306c84f842 Binary files /dev/null and b/ppo_fix_continuous_action-570000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-575000.cleanrl_model b/ppo_fix_continuous_action-575000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f3af9af0a66f3d044149b7a0dc5c1ca478374586 Binary files /dev/null and b/ppo_fix_continuous_action-575000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-580000.cleanrl_model b/ppo_fix_continuous_action-580000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4c6ff373dac7eb3a39eca0102b65a1e29ecd22d1 Binary files /dev/null and b/ppo_fix_continuous_action-580000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-585000.cleanrl_model b/ppo_fix_continuous_action-585000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3fe98b7bc5844da9303dc5d917e6cc43307df1f0 Binary files /dev/null and b/ppo_fix_continuous_action-585000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-590000.cleanrl_model b/ppo_fix_continuous_action-590000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2f6ca59b8b5bf83cc78a9a3e04b6fdcf4c397b37 Binary files /dev/null and b/ppo_fix_continuous_action-590000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-595000.cleanrl_model b/ppo_fix_continuous_action-595000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c620875aae7ee9fd44091f1ba38ea78d425d815a Binary files /dev/null and b/ppo_fix_continuous_action-595000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-60000.cleanrl_model b/ppo_fix_continuous_action-60000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..33ec96853d2b9650b937412cd9e10d367c3b5ed2 Binary files /dev/null and b/ppo_fix_continuous_action-60000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-600000.cleanrl_model b/ppo_fix_continuous_action-600000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d1e9b5dfa25e81c13909acf63dd0c8575215c6f9 Binary files /dev/null and b/ppo_fix_continuous_action-600000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-605000.cleanrl_model b/ppo_fix_continuous_action-605000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..560bd0549c2b5206d6aa67c8b84a9389fdb17283 Binary files /dev/null and b/ppo_fix_continuous_action-605000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-610000.cleanrl_model b/ppo_fix_continuous_action-610000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..61df7e24ef60fb19b41a8f4319dbc8cc6d66aae8 Binary files /dev/null and b/ppo_fix_continuous_action-610000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-615000.cleanrl_model b/ppo_fix_continuous_action-615000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..24a81b5ebd9239523baae0ebb5b2c6d10fde941e Binary files /dev/null and b/ppo_fix_continuous_action-615000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-620000.cleanrl_model b/ppo_fix_continuous_action-620000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9bb9fabc47a6f74d6af051aa1ead713ad2f51df4 Binary files /dev/null and b/ppo_fix_continuous_action-620000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-625000.cleanrl_model b/ppo_fix_continuous_action-625000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3f9459ca940468952addaa7bdb98f28cf4def357 Binary files /dev/null and b/ppo_fix_continuous_action-625000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-630000.cleanrl_model b/ppo_fix_continuous_action-630000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..821c356761cfeccc1cab990a0ba1bf5d5a0589a5 Binary files /dev/null and b/ppo_fix_continuous_action-630000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-635000.cleanrl_model b/ppo_fix_continuous_action-635000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b921fdc15c4d8832ea5ffece62671c5b7956b9a8 Binary files /dev/null and b/ppo_fix_continuous_action-635000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-640000.cleanrl_model b/ppo_fix_continuous_action-640000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..08cbd24a77a49cc8f911318e422bd51f9f2ae956 Binary files /dev/null and b/ppo_fix_continuous_action-640000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-645000.cleanrl_model b/ppo_fix_continuous_action-645000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..142a9e453b14307fe64a9a589c372f3c95f15d18 Binary files /dev/null and b/ppo_fix_continuous_action-645000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-65000.cleanrl_model b/ppo_fix_continuous_action-65000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6d7b81fe5fd91c0a9d4193fc20704cc4508da73b Binary files /dev/null and b/ppo_fix_continuous_action-65000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-650000.cleanrl_model b/ppo_fix_continuous_action-650000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..be5de2a841ce3d1f3babbe3623e1776352d44027 Binary files /dev/null and b/ppo_fix_continuous_action-650000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-655000.cleanrl_model b/ppo_fix_continuous_action-655000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fbafb681e6781d9a229e29c0bd43e3efa3a8cacf Binary files /dev/null and b/ppo_fix_continuous_action-655000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-660000.cleanrl_model b/ppo_fix_continuous_action-660000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ed6d747f85ec8cdfca8b97fae86272882b792997 Binary files /dev/null and b/ppo_fix_continuous_action-660000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-665000.cleanrl_model b/ppo_fix_continuous_action-665000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e17b1f5e9756071a9de4ab4ad404414b14238d12 Binary files /dev/null and b/ppo_fix_continuous_action-665000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-670000.cleanrl_model b/ppo_fix_continuous_action-670000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ed4ba6f0736500d63296875cdc3dad62b06362ed Binary files /dev/null and b/ppo_fix_continuous_action-670000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-675000.cleanrl_model b/ppo_fix_continuous_action-675000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..dcae57b4268a1b075148b51f2e72404995a9150c Binary files /dev/null and b/ppo_fix_continuous_action-675000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-680000.cleanrl_model b/ppo_fix_continuous_action-680000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..74df6bb24bfc3aa7ebfe51d24f65421b34832e48 Binary files /dev/null and b/ppo_fix_continuous_action-680000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-685000.cleanrl_model b/ppo_fix_continuous_action-685000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0d879c62731a72ff5ff9c586724fee19d2e43fb1 Binary files /dev/null and b/ppo_fix_continuous_action-685000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-690000.cleanrl_model b/ppo_fix_continuous_action-690000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1aaba556707a40103ea80a09bb88f5a40fa7c42d Binary files /dev/null and b/ppo_fix_continuous_action-690000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-695000.cleanrl_model b/ppo_fix_continuous_action-695000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..15046b23dd995eff1d8dff28fe2bf6ef9c639959 Binary files /dev/null and b/ppo_fix_continuous_action-695000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-70000.cleanrl_model b/ppo_fix_continuous_action-70000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7a0818d2188ea0c083f5ae39e325a4ccde4e0057 Binary files /dev/null and b/ppo_fix_continuous_action-70000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-700000.cleanrl_model b/ppo_fix_continuous_action-700000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..63824b5c1c1b8ccb5890c63a6722d2b9a019d94c Binary files /dev/null and b/ppo_fix_continuous_action-700000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-705000.cleanrl_model b/ppo_fix_continuous_action-705000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e491950f61dddf4722763cbcb763279dc984ea68 Binary files /dev/null and b/ppo_fix_continuous_action-705000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-710000.cleanrl_model b/ppo_fix_continuous_action-710000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9dbc39e69e55f3cd5056494b33267ee132410916 Binary files /dev/null and b/ppo_fix_continuous_action-710000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-715000.cleanrl_model b/ppo_fix_continuous_action-715000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..057dde5aef93f4d73cf97a2e17f755f355c20a5b Binary files /dev/null and b/ppo_fix_continuous_action-715000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-720000.cleanrl_model b/ppo_fix_continuous_action-720000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..b4f7a5e98e78190367c487aad26206a7354a8639 Binary files /dev/null and b/ppo_fix_continuous_action-720000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-725000.cleanrl_model b/ppo_fix_continuous_action-725000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3e4832b60148fbcdf300d9a9b5f69c9846625643 Binary files /dev/null and b/ppo_fix_continuous_action-725000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-730000.cleanrl_model b/ppo_fix_continuous_action-730000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..90f5fd9768db75626e9e40457ce54856e11a6493 Binary files /dev/null and b/ppo_fix_continuous_action-730000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-735000.cleanrl_model b/ppo_fix_continuous_action-735000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9aa5ae7c6e68e842f9be185dd7ee9cd29e0a4d3f Binary files /dev/null and b/ppo_fix_continuous_action-735000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-740000.cleanrl_model b/ppo_fix_continuous_action-740000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..463b49ac9e636e255543d017d87b5bda9d25898a Binary files /dev/null and b/ppo_fix_continuous_action-740000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-745000.cleanrl_model b/ppo_fix_continuous_action-745000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d208479a069399aafc21113a4e334a25e46583f2 Binary files /dev/null and b/ppo_fix_continuous_action-745000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-75000.cleanrl_model b/ppo_fix_continuous_action-75000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..d82c920a75fd76bca5673c564e35ac2ce9d07fb4 Binary files /dev/null and b/ppo_fix_continuous_action-75000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-750000.cleanrl_model b/ppo_fix_continuous_action-750000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..512f0ca8951d41f85df835f0c5b33071fa278308 Binary files /dev/null and b/ppo_fix_continuous_action-750000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-755000.cleanrl_model b/ppo_fix_continuous_action-755000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4be9383296c3bee68f36a53ff606a529b0eb191b Binary files /dev/null and b/ppo_fix_continuous_action-755000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-760000.cleanrl_model b/ppo_fix_continuous_action-760000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..ef530e0431be31d7ecdd1102b5c3da13a3cfa639 Binary files /dev/null and b/ppo_fix_continuous_action-760000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-765000.cleanrl_model b/ppo_fix_continuous_action-765000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3a989a1d78280e82bfd337a58b752ac40364f7cc Binary files /dev/null and b/ppo_fix_continuous_action-765000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-770000.cleanrl_model b/ppo_fix_continuous_action-770000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..156426c5ae984c81a1bfb895af385390603be1df Binary files /dev/null and b/ppo_fix_continuous_action-770000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-775000.cleanrl_model b/ppo_fix_continuous_action-775000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..456d9beb3d9a97baa20309bcd7bbc29f8e53fd93 Binary files /dev/null and b/ppo_fix_continuous_action-775000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-780000.cleanrl_model b/ppo_fix_continuous_action-780000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..3cb768cf783035b14d821cbc6983dd7b5deb783a Binary files /dev/null and b/ppo_fix_continuous_action-780000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-785000.cleanrl_model b/ppo_fix_continuous_action-785000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..621f050b000fbd269e59bc94dbfb11b1087e594a Binary files /dev/null and b/ppo_fix_continuous_action-785000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-790000.cleanrl_model b/ppo_fix_continuous_action-790000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5a0fc115ae46b51ed831c6133590b4722f5a258a Binary files /dev/null and b/ppo_fix_continuous_action-790000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-795000.cleanrl_model b/ppo_fix_continuous_action-795000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..493c9056a840fb386505a1bf714fcd20255296eb Binary files /dev/null and b/ppo_fix_continuous_action-795000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-80000.cleanrl_model b/ppo_fix_continuous_action-80000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..10e0a877fa7cdc3b5bf0fd7e4cdaf0a9e00978e5 Binary files /dev/null and b/ppo_fix_continuous_action-80000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-800000.cleanrl_model b/ppo_fix_continuous_action-800000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0f0b7ce654743c4529f264289966f2c1a56c87ba Binary files /dev/null and b/ppo_fix_continuous_action-800000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-805000.cleanrl_model b/ppo_fix_continuous_action-805000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..03bdf171713237b79d23a85e7f4e55800e5ca326 Binary files /dev/null and b/ppo_fix_continuous_action-805000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-810000.cleanrl_model b/ppo_fix_continuous_action-810000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..61b7ef15052af73ab13a8940c52bf82b953bd5f8 Binary files /dev/null and b/ppo_fix_continuous_action-810000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-815000.cleanrl_model b/ppo_fix_continuous_action-815000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e5a975ceef65d93f6c9a9a99fc6c27194efe9308 Binary files /dev/null and b/ppo_fix_continuous_action-815000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-820000.cleanrl_model b/ppo_fix_continuous_action-820000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9e5616e3f1b63378116256e57ee5b809389af330 Binary files /dev/null and b/ppo_fix_continuous_action-820000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-825000.cleanrl_model b/ppo_fix_continuous_action-825000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..2446c41b73c42c1bdeaeabe1b8d94f738a1b759b Binary files /dev/null and b/ppo_fix_continuous_action-825000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-830000.cleanrl_model b/ppo_fix_continuous_action-830000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..da73c1bf1dfa6a0f0e0560a6086ff716cfee1e12 Binary files /dev/null and b/ppo_fix_continuous_action-830000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-835000.cleanrl_model b/ppo_fix_continuous_action-835000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fd6e9919013ab4b8be983769f4a4d7c5996a6eeb Binary files /dev/null and b/ppo_fix_continuous_action-835000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-840000.cleanrl_model b/ppo_fix_continuous_action-840000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0786b5f2f804e1953e7d9ae1c194935992d0abae Binary files /dev/null and b/ppo_fix_continuous_action-840000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-845000.cleanrl_model b/ppo_fix_continuous_action-845000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9d9309865d68eb56ce810b9cbbc9d1039f95cd74 Binary files /dev/null and b/ppo_fix_continuous_action-845000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-85000.cleanrl_model b/ppo_fix_continuous_action-85000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..73e8e30d8b4e5054f5827fcbb59412d7a3d91cf5 Binary files /dev/null and b/ppo_fix_continuous_action-85000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-850000.cleanrl_model b/ppo_fix_continuous_action-850000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..19fbc1262f75f6478747e41cc92a9f01e94ea3d3 Binary files /dev/null and b/ppo_fix_continuous_action-850000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-855000.cleanrl_model b/ppo_fix_continuous_action-855000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e18ec19f2b8b034d8951b1b0dfeaaeb63b7336d8 Binary files /dev/null and b/ppo_fix_continuous_action-855000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-860000.cleanrl_model b/ppo_fix_continuous_action-860000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1656aeede3caa83459d051b73adfdce62bb6dfc4 Binary files /dev/null and b/ppo_fix_continuous_action-860000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-865000.cleanrl_model b/ppo_fix_continuous_action-865000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..bd953c5b8df5c5725216f1c3cb4050c5b500fda4 Binary files /dev/null and b/ppo_fix_continuous_action-865000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-870000.cleanrl_model b/ppo_fix_continuous_action-870000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a735fccc6eef659d3d8b15320632be276a46e2a6 Binary files /dev/null and b/ppo_fix_continuous_action-870000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-875000.cleanrl_model b/ppo_fix_continuous_action-875000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8a7be9e5ad50dbf68dd125b51692cc6efc07d987 Binary files /dev/null and b/ppo_fix_continuous_action-875000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-880000.cleanrl_model b/ppo_fix_continuous_action-880000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..df0d80b884f85e639bb63e90c8acb2726b7e5cda Binary files /dev/null and b/ppo_fix_continuous_action-880000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-885000.cleanrl_model b/ppo_fix_continuous_action-885000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..cf0db45a9c86959150425f3dd7b57e5f8f9ebf68 Binary files /dev/null and b/ppo_fix_continuous_action-885000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-890000.cleanrl_model b/ppo_fix_continuous_action-890000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..f510812ba5f4f50c0f28fff52467c36aab2a26e5 Binary files /dev/null and b/ppo_fix_continuous_action-890000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-895000.cleanrl_model b/ppo_fix_continuous_action-895000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9bb0a91faae70693a5542fa838a79c8e4ec2f94e Binary files /dev/null and b/ppo_fix_continuous_action-895000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-90000.cleanrl_model b/ppo_fix_continuous_action-90000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..5c85431f1ff03209ae203a6b3bcdb1c20e6a5177 Binary files /dev/null and b/ppo_fix_continuous_action-90000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-900000.cleanrl_model b/ppo_fix_continuous_action-900000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..73f33fbb70ca20432ffeaf26a8fffe209f511423 Binary files /dev/null and b/ppo_fix_continuous_action-900000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-905000.cleanrl_model b/ppo_fix_continuous_action-905000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..e4c4709ca84f7bdfeb445ddc247c44deb32a9f7d Binary files /dev/null and b/ppo_fix_continuous_action-905000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-910000.cleanrl_model b/ppo_fix_continuous_action-910000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..1c1c919d9d67855131bbaea91926c524035cd8a1 Binary files /dev/null and b/ppo_fix_continuous_action-910000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-915000.cleanrl_model b/ppo_fix_continuous_action-915000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..c62d64313185cdb42ffa70e73ead5d1481a02dd5 Binary files /dev/null and b/ppo_fix_continuous_action-915000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-920000.cleanrl_model b/ppo_fix_continuous_action-920000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..30bd8c04f173bb0911cea0ab879621697ea59b19 Binary files /dev/null and b/ppo_fix_continuous_action-920000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-925000.cleanrl_model b/ppo_fix_continuous_action-925000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fcb5867363b6fd754829c9caefcc419ca58e60e0 Binary files /dev/null and b/ppo_fix_continuous_action-925000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-930000.cleanrl_model b/ppo_fix_continuous_action-930000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..a4d87a33f8e08a5e57abf39a374bd444d6c82889 Binary files /dev/null and b/ppo_fix_continuous_action-930000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-935000.cleanrl_model b/ppo_fix_continuous_action-935000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0f99ef3a26a4f74481510e52167199c607a5b0f0 Binary files /dev/null and b/ppo_fix_continuous_action-935000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-940000.cleanrl_model b/ppo_fix_continuous_action-940000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..7a1e0dc6afe702774f4660f5da1a9179889816e1 Binary files /dev/null and b/ppo_fix_continuous_action-940000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-945000.cleanrl_model b/ppo_fix_continuous_action-945000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..0a58ab60d1c85ccc85f6c76244a1d805bbcc62d1 Binary files /dev/null and b/ppo_fix_continuous_action-945000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-95000.cleanrl_model b/ppo_fix_continuous_action-95000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..47da06acc5b04df0c7858bb69afaf23419973ecd Binary files /dev/null and b/ppo_fix_continuous_action-95000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-950000.cleanrl_model b/ppo_fix_continuous_action-950000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..8756e1c9e63d0bb1bdcfb992b07a5c8181278b5e Binary files /dev/null and b/ppo_fix_continuous_action-950000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-955000.cleanrl_model b/ppo_fix_continuous_action-955000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..9fa0f68a0cd388db7a0b663e781dea66e560ab63 Binary files /dev/null and b/ppo_fix_continuous_action-955000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-960000.cleanrl_model b/ppo_fix_continuous_action-960000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..83d3ea9db4dea0fa3963178361300af71dd8a7b3 Binary files /dev/null and b/ppo_fix_continuous_action-960000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-965000.cleanrl_model b/ppo_fix_continuous_action-965000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..886f073a656fe0711b2f9d496efb8ac6e89986a0 Binary files /dev/null and b/ppo_fix_continuous_action-965000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-970000.cleanrl_model b/ppo_fix_continuous_action-970000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6ea56b6d469874ee404f39c328ea9d0656c1bc5a Binary files /dev/null and b/ppo_fix_continuous_action-970000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-975000.cleanrl_model b/ppo_fix_continuous_action-975000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..272c51fd270af5073ddfe69be5c547c56182fce8 Binary files /dev/null and b/ppo_fix_continuous_action-975000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-980000.cleanrl_model b/ppo_fix_continuous_action-980000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..6137812267af456ff87c0c0bfd22a9a554a40039 Binary files /dev/null and b/ppo_fix_continuous_action-980000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-985000.cleanrl_model b/ppo_fix_continuous_action-985000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..fbaeb2a9a959254c614b3e5618655bd178b216e0 Binary files /dev/null and b/ppo_fix_continuous_action-985000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-990000.cleanrl_model b/ppo_fix_continuous_action-990000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..4bb85fe46a0c15f8af0d9c88d1ec8b0b248b20e1 Binary files /dev/null and b/ppo_fix_continuous_action-990000.cleanrl_model differ diff --git a/ppo_fix_continuous_action-995000.cleanrl_model b/ppo_fix_continuous_action-995000.cleanrl_model new file mode 100644 index 0000000000000000000000000000000000000000..12e0c081ab5555f7eea3253b9aec8ebc899e4935 Binary files /dev/null and b/ppo_fix_continuous_action-995000.cleanrl_model differ diff --git a/ppo_fix_continuous_action.cleanrl_model b/ppo_fix_continuous_action.cleanrl_model index c25e2fa6406d854a14c8dbd0415db0d99944c803..65e030619cdecbf4662b28523477406147b8dcd8 100644 Binary files a/ppo_fix_continuous_action.cleanrl_model and b/ppo_fix_continuous_action.cleanrl_model differ diff --git a/ppo_fix_continuous_action.py b/ppo_fix_continuous_action.py index f204e4e83e13f5da11c5e6e100efdc9b35ea4674..0d9b7c61f4d8f53a5f7c70e7065fa07fe9ad7a57 100644 --- a/ppo_fix_continuous_action.py +++ b/ppo_fix_continuous_action.py @@ -229,7 +229,7 @@ def evaluate( envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, capture_video, run_name, agent.obs_rms)]) obs, _ = envs.reset() - episodic_returns = [] + episodic_returns, episodic_lengths = [], [] while len(episodic_returns) < eval_episodes: actions, _, _, _ = agent.get_action_and_value(torch.Tensor(obs).to(device)) next_obs, _, _, _, infos = envs.step(actions.cpu().numpy()) @@ -239,9 +239,10 @@ def evaluate( continue print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}") episodic_returns += [info["episode"]["r"]] + episodic_lengths += [info["episode"]["l"]] obs = next_obs - return episodic_returns + return episodic_returns, episodic_lengths def make_env(env_id, idx, capture_video, run_name, gamma): @@ -436,6 +437,28 @@ if __name__ == "__main__": terminal_value = agent.get_value(torch.Tensor(real_next_obs).to(device)).reshape(1, -1)[0][0] rewards[step][idx] += args.gamma * terminal_value + if global_step % (5000 // args.num_envs * args.num_envs) == 0: + obs_rms, return_rms = get_rms(envs.envs[0]) + agent.obs_rms = copy.deepcopy(get_rms(envs.envs[0])[0]) + model_path = f"runs/{run_name}/{args.exp_name}-{global_step}.cleanrl_model" + torch.save(agent.state_dict(), model_path) + print(f"model saved to {model_path}") + + episodic_returns, episodic_lengths = evaluate( + model_path, + make_eval_env, + args.env_id, + eval_episodes=3, + run_name=f"{run_name}-eval", + Model=Agent, + device=device, + capture_video=False, + ) + + print(episodic_returns, episodic_lengths) + writer.add_scalar("charts/eval/episodic_return", np.mean(episodic_returns), global_step) + writer.add_scalar("charts/eval/episodic_length", np.mean(episodic_lengths), global_step) + # Only print when at least 1 env is done if "final_info" not in infos: continue @@ -549,7 +572,7 @@ if __name__ == "__main__": torch.save(agent.state_dict(), model_path) print(f"model saved to {model_path}") - episodic_returns = evaluate( + episodic_returns, episodic_lengths = evaluate( model_path, make_eval_env, args.env_id, diff --git a/replay.mp4 b/replay.mp4 index a2410c7ca1ef93401bd77f40549ba73cc0cf429f..53c4203d7d43a60a3adb1babb79b6bb181b05056 100644 Binary files a/replay.mp4 and b/replay.mp4 differ diff --git a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-0.mp4 b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-0.mp4 deleted file mode 100644 index 69cded052c777b3ca5cdb0b656183d0fafa25633..0000000000000000000000000000000000000000 Binary files a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-0.mp4 and /dev/null differ diff --git a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-1.mp4 b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-1.mp4 deleted file mode 100644 index c4834c3afe729fbd4fef5908e2bc07bc84dd57b2..0000000000000000000000000000000000000000 Binary files a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-1.mp4 and /dev/null differ diff --git a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-8.mp4 b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-8.mp4 deleted file mode 100644 index a2410c7ca1ef93401bd77f40549ba73cc0cf429f..0000000000000000000000000000000000000000 Binary files a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705695050-eval/rl-video-episode-8.mp4 and /dev/null differ diff --git a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-0.mp4 b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-0.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..87090adb0fa7f1aa1887a82b57df7d6b46c8ec5d Binary files /dev/null and b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-0.mp4 differ diff --git a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-1.mp4 b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-1.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b809f35ebc5500779851e331edf7a12455865fba Binary files /dev/null and b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-1.mp4 differ diff --git a/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-8.mp4 b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-8.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..53c4203d7d43a60a3adb1babb79b6bb181b05056 Binary files /dev/null and b/videos/Swimmer-v4__ppo_fix_continuous_action__2__1705726182-eval/rl-video-episode-8.mp4 differ