File size: 1,882 Bytes
079c32c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@startuml serial_main
header Serial Pipeline
title Serial Main

participant controller
participant env_manager
participant policy
participant learner
participant replay_buffer
participant collector
participant evaluator
participant commander
autonumber

controller -> env_manager: init collector and evaluator env_manager; set seed
controller -> policy: init policy
controller -> learner: init learner; set learn_mode policy
controller -> collector: init collector; set collect_mode policy; set env_manager
controller -> evaluator: init evaluator; set eval_mode policy; set env_manager
controller -> commander: init commander; set command_mode policy
controller -> replay_buffer: init replay_buffer
alt random collect before training starts
  collector -> collector: reset policy to random one; generate random data
  collector -> replay_buffer: push_data
  collector -> collector: reset policy back to the original one
end
learner -> learner: call before_run hook
loop
  commander -> commander: step
  alt this iteration needs evaluation
    evaluator -> evaluator: eval_performance
    alt reach eval stop_value
      learner -> learner: save checkpoint and exit
    else episode_return is new highest
      learner -> learner: save checkpoint
    end
  end
  collector -> collector: generate data (steps or episodes)
  collector -> replay_buffer: push_data
  loop learner_train_iteration times
    replay_buffer -> learner: sample_data
    learner -> learner: train
    alt replay replay_buffer use prioritization
      learner -> replay_buffer: update with priority_info
    end
  end
  alt on_policy training
    replay_buffer -> replay_buffer: clear
  end
end
learner -> learner: call after_run hook
controller -> replay_buffer: close replay_buffer
controller -> learner: close learner
controller -> collector: close collector
controller -> evaluator: close evaluator
@enduml