Spaces:

fishaudio
/

fish-speech-1

Running on A10G

App Files Files Community

fish-speech-1 / fish_speech /configs /text2semantic_pretrain.yaml

lengyue233

Init hf space integration

0a3525d verified 7 months ago

raw

history blame contribute delete

1.78 kB

	defaults:
	- base
	- model@model.model: dual_ar_2_codebook_small
	- _self_

	project: text2semantic_pretrain_dual_ar_debug
	max_length: 2048

	# Lightning Trainer
	trainer:
	accumulate_grad_batches: 1
	gradient_clip_val: 1.0
	gradient_clip_algorithm: 'norm'
	max_steps: 1_000_000
	precision: bf16-true
	limit_val_batches: 10

	# Dataset Configuration
	tokenizer:
	_target_: transformers.AutoTokenizer.from_pretrained
	pretrained_model_name_or_path: fishaudio/fish-speech-1

	# Dataset Configuration
	train_dataset:
	_target_: fish_speech.datasets.text.AutoAugTextDataset
	proto_files:
	- data/protos/train
	tokenizer: ${tokenizer}
	max_length: ${max_length}
	num_codebooks: ${model.model.config.num_codebooks}
	use_speaker: false
	interactive_prob: 0.5

	val_dataset:
	_target_: fish_speech.datasets.text.AutoAugTextDataset
	proto_files:
	- data/protos/test
	tokenizer: ${tokenizer}
	max_length: ${max_length}
	num_codebooks: ${model.model.config.num_codebooks}
	use_speaker: false
	interactive_prob: 0.5

	data:
	_target_: fish_speech.datasets.text.TextDataModule
	train_dataset: ${train_dataset}
	val_dataset: ${val_dataset}
	num_workers: 4
	batch_size: 8
	tokenizer: ${tokenizer}
	max_length: ${max_length}

	# Model Configuration
	model:
	_target_: fish_speech.models.text2semantic.TextToSemantic
	model: {}

	optimizer:
	_target_: torch.optim.AdamW
	_partial_: true
	lr: 3e-4
	weight_decay: 0.01
	betas: [0.9, 0.95]
	eps: 1e-5

	lr_scheduler:
	_target_: torch.optim.lr_scheduler.LambdaLR
	_partial_: true
	lr_lambda:
	_target_: fish_speech.scheduler.get_cosine_schedule_with_warmup_lr_lambda
	_partial_: true
	num_warmup_steps: 2000
	num_training_steps: ${trainer.max_steps}
	final_lr_ratio: 0.1