ValueError: negative dimensions are not allowed
hey
@ylacombe
, ive been trying to finetune w2vbert-2.0 on some of my own custom training data, however when i run the prepare_dataset
function i get the following error: ValueError: negative dimensions are not allowed
that originates from:
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/audio_utils.py", line 532, in spectrogram
spectrogram = np.empty((num_frames, num_frequency_bins), dtype=np.complex64)
i have tried finetuning on other models like MMS and Whisper but the issue only persists in w2vBERT-2.0.
for some additonal context i am using the latest release of transformers, datasets,torchaudio and torch.
the following is the stack trace of the crash.
preprocess datasets (num_proc=32): 9%|βββ | 6791/71915 [00:27<04:24, 245.95 examples/s]
multiprocess.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/home/user/anaconda3/lib/python3.11/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 634, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3517, in _map_single
example = apply_function_on_filtered_inputs(example, i, offset=offset)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3416, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/media/user/drive_2/maithili_asr/w2vbert2_train.py", line 669, in prepare_dataset
batch["input_features"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py", line 99, in __call__
inputs = self.feature_extractor(audio, sampling_rate=sampling_rate, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 259, in __call__
features = [self._extract_fbank_features(waveform) for waveform in raw_speech]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 259, in <listcomp>
features = [self._extract_fbank_features(waveform) for waveform in raw_speech]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 128, in _extract_fbank_features
features = spectrogram(
^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/audio_utils.py", line 532, in spectrogram
spectrogram = np.empty((num_frames, num_frequency_bins), dtype=np.complex64)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: negative dimensions are not allowed
"""
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/media/user/drive_2/maithili_asr/w2vbert2_train.py", line 807, in <module>
main()
File "/media/user/drive_2/maithili_asr/w2vbert2_train.py", line 676, in main
vectorized_datasets = raw_datasets.map(
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/dataset_dict.py", line 869, in map
{
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/dataset_dict.py", line 870, in <dictcomp>
k: dataset.map(
^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 602, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 567, in wrapper
out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3248, in map
for rank, done, content in iflatmap_unordered(
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 674, in iflatmap_unordered
[async_result.get(timeout=0.05) for async_result in async_results]
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 674, in <listcomp>
[async_result.get(timeout=0.05) for async_result in async_results]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/multiprocess/pool.py", line 774, in get
raise self._value
File "/home/user/anaconda3/lib/python3.11/site-packages/multiprocess/pool.py", line 125, in worker
result = (True, func(*args, **kwds))
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/utils/py_utils.py", line 634, in _write_generator_to_queue
for i, result in enumerate(func(**kwargs)):
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3517, in _map_single
example = apply_function_on_filtered_inputs(example, i, offset=offset)
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/datasets/arrow_dataset.py", line 3416, in apply_function_on_filtered_inputs
processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
^^^^^^^^^^^^^^^^^
File "/media/user/drive_2/maithili_asr/w2vbert2_train.py", line 669, in prepare_dataset
batch["input_features"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py", line 99, in __call__
inputs = self.feature_extractor(audio, sampling_rate=sampling_rate, **kwargs)
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 259, in __call__
features = [self._extract_fbank_features(waveform) for waveform in raw_speech]
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 259, in <listcomp>
features = [self._extract_fbank_features(waveform) for waveform in raw_speech]
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py", line 128, in _extract_fbank_features
features = spectrogram(
^^^^^^^^^^^^^^^^^
File "/home/user/anaconda3/lib/python3.11/site-packages/transformers/audio_utils.py", line 532, in spectrogram
spectrogram = np.empty((num_frames, num_frequency_bins), dtype=np.complex64)
^^^^^^^^^^^^^^^^^
ValueError: negative dimensions are not allowed
the following is how my prepare_dataset function looks like.
def prepare_dataset(batch):
audio = batch["audio"]
batch["input_features"] = processor(audio["array"], sampling_rate=audio["sampling_rate"]).input_features[0]
batch["input_length"] = len(batch["input_features"])
batch["labels"] = processor(text=batch["target_text"]).input_ids
return batch
with training_args.main_process_first(desc="dataset map preprocessing"):
vectorized_datasets = raw_datasets.map(
prepare_dataset,
remove_columns=next(iter(raw_datasets.values())).column_names,
num_proc=num_workers,
desc="preprocess datasets",
)
@ylacombe
I tried altering the code in audio_utils.py
file and added 2 lines to convert the num_frequency_bins and num_frames to int values. to avoid negative dimension.
num_frames = 1 + np.floor((waveform.size - frame_length) / hop_length)
num_frames = int(max(num_frames, 0)) # Ensure num_frames is non-negative
# Compute num_frequency_bins
num_frequency_bins = (fft_length // 2) + 1 if onesided else fft_length
num_frequency_bins = max(num_frequency_bins, 0) # Ensure num_frequency_bins is non-negative
# Create the spectrogram array
spectrogram = np.empty((num_frames, num_frequency_bins), dtype=np.complex64)
However this ended up blowing the GPU vram and causing oom errors as the GPU was trying to procure 1140GB of vram.
gently pinging @patrickvonplaten , hey Patrick could you please let me know what could be the possible issue here ? π
@ylacombe sorry for being a bit pushy. but what could be the issue here. even after several tries i am unable to fix this issue.