Spaces:
Build error
Build error
pustozerov
commited on
Commit
•
516b73f
1
Parent(s):
8f18caf
Separated paths to user and DB data.
Browse files
app.py
CHANGED
@@ -10,7 +10,8 @@ from scipy.io.wavfile import write
|
|
10 |
|
11 |
from modules.diarization.nemo_diarization import diarization
|
12 |
|
13 |
-
|
|
|
14 |
SAMPLE_RATE = 16000
|
15 |
dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
|
16 |
|
@@ -19,17 +20,17 @@ st.subheader('This simple demo shows the possibilities of the ASR and NLP in the
|
|
19 |
'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
|
20 |
'pickup a set of images from the built-in database or try uploading your own files.')
|
21 |
if st.button('Try a random sample from the database'):
|
22 |
-
|
23 |
shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
|
24 |
file_name = str(shuffled_dataset["file"][0]).split(".")[0]
|
25 |
audio_bytes = np.array(shuffled_dataset["data"][0])
|
26 |
audio_bytes_scaled = np.int16(audio_bytes / np.max(np.abs(audio_bytes)) * 32767)
|
27 |
-
write(os.path.join(
|
28 |
-
f = sf.SoundFile(os.path.join(
|
29 |
-
audio_file = open(os.path.join(
|
30 |
st.audio(audio_file.read())
|
31 |
st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
|
32 |
-
result = diarization(os.path.join(
|
33 |
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
34 |
transcript = f.read()
|
35 |
st.write("Transcription completed.")
|
@@ -46,11 +47,10 @@ if st.button('Try a random sample from the database'):
|
|
46 |
uploaded_file = st.file_uploader("Choose your recording with a speech",
|
47 |
accept_multiple_files=False, type=["mp3", "wav", "ogg"])
|
48 |
if uploaded_file is not None:
|
49 |
-
|
50 |
-
|
51 |
-
for f in glob.glob(folder + '*'):
|
52 |
os.remove(f)
|
53 |
-
save_path =
|
54 |
if ".mp3" in uploaded_file:
|
55 |
sound = AudioSegment.from_mp3(uploaded_file)
|
56 |
elif ".ogg" in uploaded_file:
|
|
|
10 |
|
11 |
from modules.diarization.nemo_diarization import diarization
|
12 |
|
13 |
+
FOLDER_WAV_DB = "data/database/"
|
14 |
+
FOLDER_WAV_USER = "data/user_data/"
|
15 |
SAMPLE_RATE = 16000
|
16 |
dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
|
17 |
|
|
|
20 |
'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
|
21 |
'pickup a set of images from the built-in database or try uploading your own files.')
|
22 |
if st.button('Try a random sample from the database'):
|
23 |
+
os.makedirs(FOLDER_WAV_DB, exist_ok=True)
|
24 |
shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
|
25 |
file_name = str(shuffled_dataset["file"][0]).split(".")[0]
|
26 |
audio_bytes = np.array(shuffled_dataset["data"][0])
|
27 |
audio_bytes_scaled = np.int16(audio_bytes / np.max(np.abs(audio_bytes)) * 32767)
|
28 |
+
write(os.path.join(FOLDER_WAV_DB, file_name + '.wav'), rate=SAMPLE_RATE, data=audio_bytes_scaled)
|
29 |
+
f = sf.SoundFile(os.path.join(FOLDER_WAV_DB, file_name + '.wav'))
|
30 |
+
audio_file = open(os.path.join(FOLDER_WAV_DB, file_name + '.wav'), 'rb')
|
31 |
st.audio(audio_file.read())
|
32 |
st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
|
33 |
+
result = diarization(os.path.join(FOLDER_WAV_DB, file_name + '.wav'))
|
34 |
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
35 |
transcript = f.read()
|
36 |
st.write("Transcription completed.")
|
|
|
47 |
uploaded_file = st.file_uploader("Choose your recording with a speech",
|
48 |
accept_multiple_files=False, type=["mp3", "wav", "ogg"])
|
49 |
if uploaded_file is not None:
|
50 |
+
os.makedirs(FOLDER_WAV_USER, exist_ok=True)
|
51 |
+
for f in glob.glob(FOLDER_WAV_USER + '*'):
|
|
|
52 |
os.remove(f)
|
53 |
+
save_path = FOLDER_WAV_USER + uploaded_file.name
|
54 |
if ".mp3" in uploaded_file:
|
55 |
sound = AudioSegment.from_mp3(uploaded_file)
|
56 |
elif ".ogg" in uploaded_file:
|