File size: 1,137 Bytes
3be620b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
model:
transformer_config:
checkpoint_path: ./checkpoints/kny_video_full_gpt2_medium/checkpoint
remaining_frames_method: "own_embeddings"
transformer_type: "gpt2-medium"
first_stage_config:
checkpoint_path: ./checkpoints/kny_image_full_vgg19/checkpoint
vqvae_config:
beta: 0.25
num_embeddings: 50257
embedding_dim: 128
autoencoder_config:
z_channels: 512
channels: 32
channels_multiplier:
- 2
- 4
- 8
- 8
num_res_blocks: 1
attention_resolution:
- 16
resolution: 128
dropout: 0.0
discriminator_config:
num_layers: 3
filters: 64
loss_config:
discriminator:
loss: "hinge"
factor: 1.0
iter_start: 16200
weight: 0.3
vqvae:
codebook_weight: 1.0
perceptual_weight: 4.0
perceptual_loss: "vgg19"
train:
batch_size: 64
accumulation_size: 1
n_epochs: 500
len_x_train: 28213
warmup_epoch_percentage: 0.15
lr_start: 5e-6
lr_max: 1e-4
perceptual_loss_weight: 1.0
n_frames_before: 5
stop_ground_truth_after_epoch: 200
|