AlexHung29629 commited on
Commit
ae4dcca
1 Parent(s): 424f6bf

Update ultravox_processing.py

Browse files
Files changed (1) hide show
  1. ultravox_processing.py +2 -2
ultravox_processing.py CHANGED
@@ -150,10 +150,10 @@ class UltravoxProcessor(transformers.ProcessorMixin):
150
  def cnn_out_len(in_len, kernel, stride=1, padding=1, dilation=1):
151
  return np.floor((in_len + (2*padding) - (dilation * (kernel - 1)) - 1)/stride + 1)
152
  def stack_frame_len(T):
153
- T_pad = (T + self.stack_factor - 1) // self.stack_factor * self.stack_factor
154
  return int((T_pad + self.stack_factor) // self.stack_factor)
155
  nb_encoder_frames = [cnn_out_len(cnn_out_len(feat_len, kernel=3), kernel=3, stride=2) for feat_len in data["audio_len"]]
156
- data["audio_token_len"] = [stack_frame_len(x) for x in nb_encoder_frames]
157
 
158
  if text is not None:
159
  assert isinstance(
 
150
  def cnn_out_len(in_len, kernel, stride=1, padding=1, dilation=1):
151
  return np.floor((in_len + (2*padding) - (dilation * (kernel - 1)) - 1)/stride + 1)
152
  def stack_frame_len(T):
153
+ T_pad = ((T + self.stack_factor - 1) // self.stack_factor) * self.stack_factor
154
  return int((T_pad + self.stack_factor) // self.stack_factor)
155
  nb_encoder_frames = [cnn_out_len(cnn_out_len(feat_len, kernel=3), kernel=3, stride=2) for feat_len in data["audio_len"]]
156
+ data["audio_token_len"] = [stack_frame_len(nf) for nf in nb_encoder_frames]
157
 
158
  if text is not None:
159
  assert isinstance(