speechbrain
/

slu-wav2vec2-ctc-MEDIA-full

Token Classification

hf-slu-leaderboard

Model card Files Files and versions Community

slu-wav2vec2-ctc-MEDIA-full / hyperparams.yaml

Adel-Moumen's picture

Update hyperparams.yaml

5ea4fed verified 9 months ago

history blame contribute delete

1.67 kB

	# ################################
	# Model: Wav2Vec + DNN + CTC + Softmax
	# Authors:
	# Gaelle Laperriere 2023
	# ################################

	wav2vec_url: LeBenchmark/wav2vec2-FR-3K-large

	# Feature parameters:
	sample_rate: 16000
	feats_dim: 1024

	# Model parameters:
	activation: !name:torch.nn.LeakyReLU
	dnn_blocks: 3
	dnn_neurons: 512
	log_softmax: !new:torch.nn.LogSoftmax
	dim: -1

	# Decoding parameters:
	blank_index: 0

	# Outputs:
	output_neurons: 212

	# ------ Functions and classes

	wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
	source: !ref <wav2vec_url>
	output_norm: True
	freeze: True
	save_path: wav2vec2_checkpoint

	enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
	input_shape: [null, null, !ref <feats_dim>]
	activation: !ref <activation>
	dnn_blocks: !ref <dnn_blocks>
	dnn_neurons: !ref <dnn_neurons>

	output_lin: !new:speechbrain.nnet.linear.Linear
	input_size: !ref <dnn_neurons>
	n_neurons: !ref <output_neurons>
	bias: True

	model: !new:torch.nn.ModuleList
	- [!ref <enc>, !ref <output_lin>]

	tokenizer: !new:speechbrain.dataio.encoder.CTCTextEncoder

	encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
	wav2vec2: !ref <wav2vec2>
	enc: !ref <enc>
	output_lin: !ref <output_lin>
	log_softmax: !ref <log_softmax>

	decoding_function: !name:speechbrain.decoders.ctc_greedy_decode
	blank_id: !ref <blank_index>

	modules:
	encoder: !ref <encoder>

	pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
	loadables:
	wav2vec2: !ref <wav2vec2>
	model: !ref <model>
	tokenizer: !ref <tokenizer>