{ "_name_or_path": "ArthurZ/jukebox-5b-lyrics", "activation_function": "quick_gelu", "alignment_head": [ 2, null, null ], "alignment_layer": [ 68, null, null ], "architectures": [ "JukeboxModel" ], "attn_dropout": 0.0, "attn_init_scale": 1.0, "attn_order": [ 10, 2, 2 ], "blocks": 128, "bos_token_id": 50256, "c_res": 1, "cond_c_res": [ 0, 1, 1 ], "cond_depth": [ 3, 16, 16 ], "cond_dilation_cycle": [ null, 8, 8 ], "cond_dilation_growth_rate": [ 1, 3, 3 ], "cond_m_conv": 1, "cond_res_scale": false, "cond_width": [ 128, 1024, 1024 ], "cond_zero_out": false, "copy_input": false, "depth": [ 79, 72, 72 ], "downs_t": [ 3, 2, 2 ], "emb_dropout": 0.0, "eos_token_id": 50256, "fp16_params": true, "hop_fraction": [ 0.125, 0.5, 0.5 ], "hop_length": 256, "init_scale": [ 0.1, 1, 1 ], "initializer_range": 0.02, "l_bins": 2048, "labels": true, "layer_norm_epsilon": 1e-05, "m_attn": 0.25, "max_bow_genre_size": 5, "max_duration": 600.0, "merged_decoder": [ true, false, false ], "min_duration": 23.8, "mlp_init_scale": 0.02, "model_type": "jukebox", "multispec_loss_hop_length": [ 240, 120, 50 ], "multispec_loss_n_fft": [ 2048, 1024, 512 ], "multispec_loss_window_size": [ 1200, 600, 240 ], "multispectral": 1.0, "n_ctx": [ 8192, 8192, 8192 ], "n_embd": 768, "n_head": 12, "n_heads": [ 8, 1, 1 ], "n_inner": null, "n_layer": 12, "n_positions": 1024, "n_samples": 1, "n_tokens": [ 512, 0, 0 ], "n_vocab": 80, "name": "AudioSamples", "nb_priors": 3, "pos_init": false, "prime_attn_dropout": 0.0, "prime_attn_order": [ 2, 0, 0 ], "prime_blocks": 32, "prime_c_res": 1, "prime_cond_c_res": [ 0, 1, 1 ], "prime_depth": [ 18, 3, 3 ], "prime_emb_dropout": 0.0, "prime_heads": 4, "prime_init_scale": [ 0.1, 0.4, 0.4 ], "prime_loss_fraction": [ 0.4, 0.0, 0.0 ], "prime_m_attn": 0.25, "prime_m_mlp": 1.0, "prime_pos_init": false, "prime_res_scale": false, "prime_resid_dropout": 0.0, "prime_spread": null, "prime_width": [ 1280, 128, 128 ], "prime_zero_out": false, "priors_width": [ 4096, 2048, 1024 ], "reorder_and_upcast_attn": false, "res_scale": false, "resid_dropout": 0.0, "sample_hop_length": 30000, "sample_length": 1058304, "sample_length_in_seconds": 24, "scale_attn_by_inverse_layer_idx": false, "scale_attn_weights": true, "single_enc_dec": [ false, false, false ], "spectral": 0.0, "spread": null, "sr": 44100, "strides_t": [ 2, 2, 2 ], "summary_activation": null, "summary_first_dropout": 0.1, "summary_proj_to_labels": true, "summary_type": "cls_index", "summary_use_proj": true, "t_bins": 128, "torch_dtype": "float32", "total_sample_length_in_seconds": 180, "transformers_version": "4.21.0.dev0", "use_cache": true, "use_nonrelative_specloss": true, "use_tokens": [ true, false, false ], "vocab_size": 50257, "vq_vae_codebook_dimension": 2048, "vq_vae_commit": 0.02, "vq_vae_conv_block_depth": 4, "vq_vae_conv_block_width": 32, "vq_vae_depth": 4, "vq_vae_dilation_cycle": null, "vq_vae_dilation_growth_rate": 3, "vq_vae_downs_t": [ 3, 2, 2 ], "vq_vae_emmbedding_width": 64, "vq_vae_levels": 3, "vq_vae_lmu": 0.99, "vq_vae_m_conv": 1, "vq_vae_multipliers": [ 2, 1, 1 ], "vq_vae_reverse_decoder_dilation": 1, "vq_vae_strides_t": [ 2, 2, 2 ], "vq_vae_width": 64, "vqvae_z_shapes": [ [ 2067 ], [ 8268 ], [ 33072 ] ], "width": [ 4800, 1920, 1920 ], "y_bins": [ [ 120, 4111 ], [ 120, 4111 ], [ 120, 4111 ] ], "zero_out": false }