File size: 1,406 Bytes
3be620b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
model:
transformer_config:
checkpoint_path: ../../../checkpoints/kny_video_light/checkpoint
# vocab_size: 50257
# n_positions: 1024
# n_embd: 1024 #1280 #768
# n_layer: 24 #36 #12
# n_head: 16 #20 #12
# resid_pdrop: 0.1
# embd_pdrop: 0.1
# attn_pdrop: 0.1
# remaining_frames_method: "concat"
# remaining_frames_method: "token_type_ids"
remaining_frames_method: "own_embeddings"
first_stage_config:
checkpoint_path: ../../../checkpoints/kny_image_light_discriminator/checkpoint
vqvae_config:
beta: 0.25
num_embeddings: 64
embedding_dim: 256
autoencoder_config:
z_channels: 128
channels: 64
channels_multiplier:
- 1
- 1
- 2
- 2
- 4
num_res_blocks: 1
attention_resolution:
- 16
resolution: 128
dropout: 0.0
discriminator_config:
num_layers: 3
filters: 64
loss_config:
discriminator:
loss: "hinge"
factor: 1.0
iter_start: 16200
weight: 0.3
vqvae:
codebook_weight: 1.0
perceptual_weight: 4.0
perceptual_loss: "style"
train:
batch_size: 8
accumulation_size: 8
n_epochs: 2000
len_x_train: 631
warmup_epoch_percentage: 0.15
lr_start: 1e-5
lr_max: 2.5e-4
perceptual_loss_weight: 1.0
n_frames_before: 5
stop_ground_truth_after_epoch: 100
|