AlexHung29629 commited on
Commit
2c3c2d2
1 Parent(s): f29a23c

Update ultravox_processing.py

Browse files
Files changed (1) hide show
  1. ultravox_processing.py +4 -1
ultravox_processing.py CHANGED
@@ -150,8 +150,11 @@ class UltravoxProcessor(transformers.ProcessorMixin):
150
  data["audio_len"] = x.attention_mask.sum(-1) - 1
151
  def cnn_out_len(in_len, kernel, stride=1, padding=1, dilation=1):
152
  return np.floor((in_len + (2*padding) - (dilation * (kernel - 1)) - 1)/stride + 1)
 
 
 
153
  nb_encoder_frames = [cnn_out_len(cnn_out_len(feat_len, kernel=3), kernel=3, stride=2) for feat_len in data["audio_len"]]
154
- data["audio_token_len"] = [int(x//self.stack_factor) for x in nb_encoder_frames]
155
 
156
  if text is not None:
157
  assert isinstance(
 
150
  data["audio_len"] = x.attention_mask.sum(-1) - 1
151
  def cnn_out_len(in_len, kernel, stride=1, padding=1, dilation=1):
152
  return np.floor((in_len + (2*padding) - (dilation * (kernel - 1)) - 1)/stride + 1)
153
+ def stack_frame_len(T):
154
+ T_pad = (T + self.stack_factor - 1) // self.stack_factor * self.stack_factor
155
+ return int((T_pad + self.stack_factor) // self.stack_factor)
156
  nb_encoder_frames = [cnn_out_len(cnn_out_len(feat_len, kernel=3), kernel=3, stride=2) for feat_len in data["audio_len"]]
157
+ data["audio_token_len"] = [stack_frame_len(x) for x in nb_encoder_frames]
158
 
159
  if text is not None:
160
  assert isinstance(