Spaces:
Running
Running
ashhadahsan
commited on
Commit
•
5e3e8ef
1
Parent(s):
2f1bcc3
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,8 @@ from utils import translate_to_english, detect_language, write, read, get_key
|
|
6 |
import whisperx as whisper
|
7 |
import json
|
8 |
import pandas as pd
|
|
|
|
|
9 |
|
10 |
if "btn1" not in st.session_state:
|
11 |
st.session_state["btn1"] = False
|
@@ -135,22 +137,37 @@ with input:
|
|
135 |
)
|
136 |
else:
|
137 |
temperature = [temperature]
|
|
|
|
|
|
|
|
|
|
|
138 |
# st.write(temperature)
|
139 |
submit = st.button("Submit", type="primary")
|
140 |
with output:
|
141 |
st.header("Output")
|
|
|
|
|
|
|
142 |
if submit:
|
143 |
if audio_uploaded is None:
|
144 |
# st.audio(audio_bytes, format="audio/wav")
|
145 |
audio_uploaded = audio_file
|
146 |
if audio_uploaded is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
|
148 |
# audio_bytes = audio_uploaded.read()
|
149 |
# st.audio(audio_bytes, format="audio/wav")
|
150 |
if language == "":
|
151 |
model = whisper.load_model(model_name)
|
152 |
with st.spinner("Detecting language..."):
|
153 |
-
detection = detect_language(
|
154 |
language = detection.get("detected_language")
|
155 |
del model
|
156 |
# st.write(language)
|
@@ -169,7 +186,7 @@ with output:
|
|
169 |
with st.container():
|
170 |
with st.spinner(f"Running with {model_name} model"):
|
171 |
result = model.transcribe(
|
172 |
-
|
173 |
language=language,
|
174 |
patience=patience,
|
175 |
initial_prompt=initial_prompt,
|
@@ -193,7 +210,7 @@ with output:
|
|
193 |
result["segments"],
|
194 |
model_a,
|
195 |
metadata,
|
196 |
-
|
197 |
device=device,
|
198 |
)
|
199 |
|
@@ -212,11 +229,11 @@ with output:
|
|
212 |
if text_json is None:
|
213 |
words_segments = result_aligned["word_segments"]
|
214 |
write(
|
215 |
-
|
216 |
dtype=transcription,
|
217 |
result_aligned=result_aligned,
|
218 |
)
|
219 |
-
trans_text = read(
|
220 |
trans.text_area(
|
221 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
222 |
)
|
@@ -248,3 +265,4 @@ with output:
|
|
248 |
lang.text_input(
|
249 |
"detected language", language_dict.get(language), disabled=True
|
250 |
)
|
|
|
|
6 |
import whisperx as whisper
|
7 |
import json
|
8 |
import pandas as pd
|
9 |
+
from pydub import AudioSegment
|
10 |
+
import os
|
11 |
|
12 |
if "btn1" not in st.session_state:
|
13 |
st.session_state["btn1"] = False
|
|
|
137 |
)
|
138 |
else:
|
139 |
temperature = [temperature]
|
140 |
+
try:
|
141 |
+
if len(temperature) == 0:
|
142 |
+
st.error("Choose correct value for temperature")
|
143 |
+
except:
|
144 |
+
pass
|
145 |
# st.write(temperature)
|
146 |
submit = st.button("Submit", type="primary")
|
147 |
with output:
|
148 |
st.header("Output")
|
149 |
+
import uuid
|
150 |
+
|
151 |
+
name = str(uuid.uuid1())
|
152 |
if submit:
|
153 |
if audio_uploaded is None:
|
154 |
# st.audio(audio_bytes, format="audio/wav")
|
155 |
audio_uploaded = audio_file
|
156 |
if audio_uploaded is not None:
|
157 |
+
if audio_uploaded.name.endswith(".wav"):
|
158 |
+
temp = AudioSegment.from_wav(audio_uploaded)
|
159 |
+
temp.export(f"{name}.wav")
|
160 |
+
|
161 |
+
if audio_uploaded.name.endswith(".mp3"):
|
162 |
+
temp = AudioSegment.from_wav(audio_uploaded)
|
163 |
+
temp.export(f"{name}.wav")
|
164 |
|
165 |
# audio_bytes = audio_uploaded.read()
|
166 |
# st.audio(audio_bytes, format="audio/wav")
|
167 |
if language == "":
|
168 |
model = whisper.load_model(model_name)
|
169 |
with st.spinner("Detecting language..."):
|
170 |
+
detection = detect_language(f"{name}.wav", model)
|
171 |
language = detection.get("detected_language")
|
172 |
del model
|
173 |
# st.write(language)
|
|
|
186 |
with st.container():
|
187 |
with st.spinner(f"Running with {model_name} model"):
|
188 |
result = model.transcribe(
|
189 |
+
f"{name}.wav",
|
190 |
language=language,
|
191 |
patience=patience,
|
192 |
initial_prompt=initial_prompt,
|
|
|
210 |
result["segments"],
|
211 |
model_a,
|
212 |
metadata,
|
213 |
+
f"{name}.wav",
|
214 |
device=device,
|
215 |
)
|
216 |
|
|
|
229 |
if text_json is None:
|
230 |
words_segments = result_aligned["word_segments"]
|
231 |
write(
|
232 |
+
f"{name}.wav",
|
233 |
dtype=transcription,
|
234 |
result_aligned=result_aligned,
|
235 |
)
|
236 |
+
trans_text = read(f"{name}.wav", transcription)
|
237 |
trans.text_area(
|
238 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
239 |
)
|
|
|
265 |
lang.text_input(
|
266 |
"detected language", language_dict.get(language), disabled=True
|
267 |
)
|
268 |
+
os.remove(f"{name}.wav")
|