alex-ht
commited on
Commit
•
7b01696
1
Parent(s):
f767f82
update
Browse files- ultravox_processing.py +2 -1
ultravox_processing.py
CHANGED
@@ -171,6 +171,7 @@ class UltravoxProcessor(transformers.ProcessorMixin):
|
|
171 |
text, list
|
172 |
), "Text must be a list."
|
173 |
processed_text = []
|
|
|
174 |
for t in text:
|
175 |
if self.audio_placeholder in t:
|
176 |
if "audio_token_len" not in data:
|
@@ -184,7 +185,7 @@ class UltravoxProcessor(transformers.ProcessorMixin):
|
|
184 |
add_special_tokens=False,
|
185 |
)
|
186 |
)
|
187 |
-
data["audio_token_start_idx"]
|
188 |
|
189 |
# Replace the audio placeholder with the audio token.
|
190 |
# e.g. "Transcribe\n<|audio|>" -> "Transcribe </s></s></s></s></s></s></s></s>"
|
|
|
171 |
text, list
|
172 |
), "Text must be a list."
|
173 |
processed_text = []
|
174 |
+
data["audio_token_start_idx"] = []
|
175 |
for t in text:
|
176 |
if self.audio_placeholder in t:
|
177 |
if "audio_token_len" not in data:
|
|
|
185 |
add_special_tokens=False,
|
186 |
)
|
187 |
)
|
188 |
+
data["audio_token_start_idx"].append(start_idx)
|
189 |
|
190 |
# Replace the audio placeholder with the audio token.
|
191 |
# e.g. "Transcribe\n<|audio|>" -> "Transcribe </s></s></s></s></s></s></s></s>"
|