File size: 1,882 Bytes
079c32c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
@startuml serial_main
header Serial Pipeline
title Serial Main
participant controller
participant env_manager
participant policy
participant learner
participant replay_buffer
participant collector
participant evaluator
participant commander
autonumber
controller -> env_manager: init collector and evaluator env_manager; set seed
controller -> policy: init policy
controller -> learner: init learner; set learn_mode policy
controller -> collector: init collector; set collect_mode policy; set env_manager
controller -> evaluator: init evaluator; set eval_mode policy; set env_manager
controller -> commander: init commander; set command_mode policy
controller -> replay_buffer: init replay_buffer
alt random collect before training starts
collector -> collector: reset policy to random one; generate random data
collector -> replay_buffer: push_data
collector -> collector: reset policy back to the original one
end
learner -> learner: call before_run hook
loop
commander -> commander: step
alt this iteration needs evaluation
evaluator -> evaluator: eval_performance
alt reach eval stop_value
learner -> learner: save checkpoint and exit
else episode_return is new highest
learner -> learner: save checkpoint
end
end
collector -> collector: generate data (steps or episodes)
collector -> replay_buffer: push_data
loop learner_train_iteration times
replay_buffer -> learner: sample_data
learner -> learner: train
alt replay replay_buffer use prioritization
learner -> replay_buffer: update with priority_info
end
end
alt on_policy training
replay_buffer -> replay_buffer: clear
end
end
learner -> learner: call after_run hook
controller -> replay_buffer: close replay_buffer
controller -> learner: close learner
controller -> collector: close collector
controller -> evaluator: close evaluator
@enduml
|