Do not add speech detected in gaps to the prompt window
Browse files- src/vad.py +8 -6
src/vad.py
CHANGED
@@ -151,6 +151,7 @@ class AbstractTranscription(ABC):
|
|
151 |
segment_start = segment['start']
|
152 |
segment_end = segment['end']
|
153 |
segment_expand_amount = segment.get('expand_amount', 0)
|
|
|
154 |
|
155 |
segment_duration = segment_end - segment_start
|
156 |
|
@@ -187,19 +188,20 @@ class AbstractTranscription(ABC):
|
|
187 |
languageCounter[segment_result['language']] += 1
|
188 |
|
189 |
# Update prompt window
|
190 |
-
self.__update_prompt_window(prompt_window, adjusted_segments, segment_end)
|
191 |
|
192 |
if len(languageCounter) > 0:
|
193 |
result['language'] = languageCounter.most_common(1)[0][0]
|
194 |
|
195 |
return result
|
196 |
|
197 |
-
def __update_prompt_window(self, prompt_window: Deque, adjusted_segments: List, segment_end: float):
|
198 |
if (self.max_prompt_window is not None and self.max_prompt_window > 0):
|
199 |
-
# Add segments to the current prompt window
|
200 |
-
|
201 |
-
|
202 |
-
|
|
|
203 |
|
204 |
while (len(prompt_window) > 0):
|
205 |
first_end_time = prompt_window[0].get('end', 0)
|
|
|
151 |
segment_start = segment['start']
|
152 |
segment_end = segment['end']
|
153 |
segment_expand_amount = segment.get('expand_amount', 0)
|
154 |
+
segment_gap = segment.get('gap', False)
|
155 |
|
156 |
segment_duration = segment_end - segment_start
|
157 |
|
|
|
188 |
languageCounter[segment_result['language']] += 1
|
189 |
|
190 |
# Update prompt window
|
191 |
+
self.__update_prompt_window(prompt_window, adjusted_segments, segment_end, segment_gap)
|
192 |
|
193 |
if len(languageCounter) > 0:
|
194 |
result['language'] = languageCounter.most_common(1)[0][0]
|
195 |
|
196 |
return result
|
197 |
|
198 |
+
def __update_prompt_window(self, prompt_window: Deque, adjusted_segments: List, segment_end: float, segment_gap: bool = False):
|
199 |
if (self.max_prompt_window is not None and self.max_prompt_window > 0):
|
200 |
+
# Add segments to the current prompt window (unless it is a speech gap)
|
201 |
+
if not segment_gap:
|
202 |
+
for segment in adjusted_segments:
|
203 |
+
if segment.get('no_speech_prob', 0) <= PROMPT_NO_SPEECH_PROB:
|
204 |
+
prompt_window.append(segment)
|
205 |
|
206 |
while (len(prompt_window) > 0):
|
207 |
first_end_time = prompt_window[0].get('end', 0)
|