File size: 2,632 Bytes
05c9ac2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
behaviors:
Sorter:
trainer_type: ppo
hyperparameters:
batch_size: 512
buffer_size: 40960
learning_rate: 0.0003
beta: 0.005
epsilon: 0.2
lambd: 0.95
num_epoch: 3
learning_rate_schedule: constant
network_settings:
normalize: False
hidden_units: 128
num_layers: 2
vis_encode_type: simple
reward_signals:
extrinsic:
gamma: 0.99
strength: 1.0
keep_checkpoints: 5
max_steps: 5000000
time_horizon: 256
summary_freq: 10000
environment_parameters:
num_tiles:
curriculum:
- name: Lesson0 # The '-' is important as this is a list
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.3
value: 2.0
- name: Lesson1
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.4
value: 4.0
- name: Lesson2
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.45
value: 6.0
- name: Lesson3
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.5
value: 8.0
- name: Lesson4
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.55
value: 10.0
- name: Lesson5
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.6
value: 12.0
- name: Lesson6
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.65
value: 14.0
- name: Lesson7
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.7
value: 16.0
- name: Lesson8
completion_criteria:
measure: progress
behavior: Sorter
signal_smoothing: true
min_lesson_length: 100
threshold: 0.75
value: 18.0
- name: Lesson9
value: 20.0
|