|
model: |
|
transformer_config: |
|
checkpoint_path: ./checkpoints/kny_video_full_gpt2_medium/checkpoint |
|
remaining_frames_method: "own_embeddings" |
|
transformer_type: "gpt2-medium" |
|
first_stage_config: |
|
checkpoint_path: ./checkpoints/kny_image_full_vgg19/checkpoint |
|
vqvae_config: |
|
beta: 0.25 |
|
num_embeddings: 50257 |
|
embedding_dim: 128 |
|
autoencoder_config: |
|
z_channels: 512 |
|
channels: 32 |
|
channels_multiplier: |
|
- 2 |
|
- 4 |
|
- 8 |
|
- 8 |
|
num_res_blocks: 1 |
|
attention_resolution: |
|
- 16 |
|
resolution: 128 |
|
dropout: 0.0 |
|
discriminator_config: |
|
num_layers: 3 |
|
filters: 64 |
|
|
|
loss_config: |
|
discriminator: |
|
loss: "hinge" |
|
factor: 1.0 |
|
iter_start: 16200 |
|
weight: 0.3 |
|
vqvae: |
|
codebook_weight: 1.0 |
|
perceptual_weight: 4.0 |
|
perceptual_loss: "vgg19" |
|
|
|
train: |
|
batch_size: 64 |
|
accumulation_size: 1 |
|
n_epochs: 500 |
|
len_x_train: 28213 |
|
warmup_epoch_percentage: 0.15 |
|
lr_start: 5e-6 |
|
lr_max: 1e-4 |
|
perceptual_loss_weight: 1.0 |
|
n_frames_before: 5 |
|
stop_ground_truth_after_epoch: 200 |
|
|