Problems with speechbrain.pretrained
Hello, I can麓t install the library speechbrain.pretrained in Google Colab. All times the message is that not found this module.
Is it possible that have had any kind of changes recently?
Thanks
-Joan
Hi @Joan1949
Try
pip install speechbrain=0.5.16
And restart the kernel or suggest you to use local ide like visual studio
Hello again, I managed to do it using a new local IDE, specifically PyCharm.
Now my problem is that I can't load the model I want to retrain.
Here's my code:
import os
import torch
from torch import optim
from speechbrain.pretrained import ASRCNNTransducer
from torch.utils.data import DataLoader
from torchaudio import load
Configuraci贸n de hiperpar谩metros
learning_rate = 1e-4
num_epochs = 10
batch_size = 8
model_checkpoint = "speechbrain/asr-crdnn-commonvoice-14-es"
dataset_folder = "C:\Users\jsgin\Desktop\compartida\dataset" # Carpeta que contiene todos los archivos
Cargar el modelo preentrenado
print("Cargando el modelo preentrenado...")
asr_model = ASRCNNTransducer.from_hparams(source=model_checkpoint, savedir="pretrained_model")
print("Modelo cargado:", asr_model)
Verificar si asr_model se inicializ贸 correctamente
if asr_model is not None:
print("El modelo se inicializ贸 correctamente.")
optimizer = optim.Adam(asr_model.parameters(), lr=learning_rate)
# Funci贸n de p茅rdida
criterion = torch.nn.CTCLoss(blank=asr_model.tokenizer.tokenizer.pad_id, reduction='mean')
# Funci贸n para cargar los archivos de texto y audio
def load_data(folder):
audio_files = []
text_data = {}
for filename in os.listdir(folder):
if filename.endswith(".wav"):
audio_files.append(os.path.join(folder, filename))
elif filename.endswith(".txt"):
with open(os.path.join(folder, filename), "r", encoding="utf-8") as file:
text = file.read().strip()
basename = os.path.splitext(filename)[0]
text_data[basename] = text
return audio_files, text_data
# Cargar archivos de texto y audio
audio_files, text_data = load_data(dataset_folder)
# Combinar audio y texto
dataset = [(audio_file, text_data[os.path.splitext(os.path.basename(audio_file))[0]]) for audio_file in audio_files]
# DataLoader
def custom_collate(batch):
audios, transcripts = zip(*batch)
audio_tensors = []
for audio_path in audios:
waveform, _ = load(audio_path)
audio_tensors.append(waveform)
return audio_tensors, transcripts
dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=custom_collate)
# Entrenamiento del modelo
for epoch in range(num_epochs):
asr_model.train()
total_loss = 0.0
for audio_batch, transcription_batch in dataloader:
optimizer.zero_grad()
logits = asr_model(audio_batch)
logits = logits.transpose(1, 0) # Transponer logits para que coincidan con la forma esperada por CTCLoss
input_lengths = torch.full(size=(logits.size(1),), fill_value=logits.size(0), dtype=torch.int)
target_lengths = torch.tensor([len(transcription) for transcription in transcription_batch], dtype=torch.int)
targets = asr_model.tokenizer.encode_batch(transcription_batch)
loss = criterion(logits, targets, input_lengths, target_lengths)
loss.backward()
optimizer.step()
total_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {total_loss}")
# Guardar el modelo entrenado
torch.save(asr_model.state_dict(), "trained_model.pth")
else:
print("Error: El modelo no se inicializ贸 correctamente.")
Thanks :)