Spaces:

pustozerov
/

poc_call_transcription

Build error

App Files Files Community

pustozerov commited on Aug 16, 2022

Commit

516b73f

•

1 Parent(s): 8f18caf

Separated paths to user and DB data.

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -10,7 +10,8 @@ from scipy.io.wavfile import write
 from modules.diarization.nemo_diarization import diarization
-FOLDER_WAV = "data/user_data"
 SAMPLE_RATE = 16000
 dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
@@ -19,17 +20,17 @@ st.subheader('This simple demo shows the possibilities of the ASR and NLP in the
              'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
              'pickup a set of images from the built-in database or try uploading your own files.')
 if st.button('Try a random sample from the database'):
     shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
     file_name = str(shuffled_dataset["file"][0]).split(".")[0]
     audio_bytes = np.array(shuffled_dataset["data"][0])
     audio_bytes_scaled = np.int16(audio_bytes / np.max(np.abs(audio_bytes)) * 32767)
-    write(os.path.join(FOLDER_WAV, file_name + '.wav'), rate=SAMPLE_RATE, data=audio_bytes_scaled)
-    f = sf.SoundFile(os.path.join(FOLDER_WAV, file_name + '.wav'))
-    audio_file = open(os.path.join(FOLDER_WAV, file_name + '.wav'), 'rb')
     st.audio(audio_file.read())
     st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
-    result = diarization(os.path.join(FOLDER_WAV, file_name + '.wav'))
     with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
         transcript = f.read()
     st.write("Transcription completed.")
@@ -46,11 +47,10 @@ if st.button('Try a random sample from the database'):
 uploaded_file = st.file_uploader("Choose your recording with a speech",
                                  accept_multiple_files=False, type=["mp3", "wav", "ogg"])
 if uploaded_file is not None:
-    folder = "data/user_data/"
-    os.makedirs(folder, exist_ok=True)
-    for f in glob.glob(folder + '*'):
         os.remove(f)
-    save_path = folder + uploaded_file.name
     if ".mp3" in uploaded_file:
         sound = AudioSegment.from_mp3(uploaded_file)
     elif ".ogg" in uploaded_file:

 from modules.diarization.nemo_diarization import diarization
+FOLDER_WAV_DB = "data/database/"
+FOLDER_WAV_USER = "data/user_data/"
 SAMPLE_RATE = 16000
 dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
              'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
              'pickup a set of images from the built-in database or try uploading your own files.')
 if st.button('Try a random sample from the database'):
+    os.makedirs(FOLDER_WAV_DB, exist_ok=True)
     shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
     file_name = str(shuffled_dataset["file"][0]).split(".")[0]
     audio_bytes = np.array(shuffled_dataset["data"][0])
     audio_bytes_scaled = np.int16(audio_bytes / np.max(np.abs(audio_bytes)) * 32767)
+    write(os.path.join(FOLDER_WAV_DB, file_name + '.wav'), rate=SAMPLE_RATE, data=audio_bytes_scaled)
+    f = sf.SoundFile(os.path.join(FOLDER_WAV_DB, file_name + '.wav'))
+    audio_file = open(os.path.join(FOLDER_WAV_DB, file_name + '.wav'), 'rb')
     st.audio(audio_file.read())
     st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
+    result = diarization(os.path.join(FOLDER_WAV_DB, file_name + '.wav'))
     with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
         transcript = f.read()
     st.write("Transcription completed.")
 uploaded_file = st.file_uploader("Choose your recording with a speech",
                                  accept_multiple_files=False, type=["mp3", "wav", "ogg"])
 if uploaded_file is not None:
+    os.makedirs(FOLDER_WAV_USER, exist_ok=True)
+    for f in glob.glob(FOLDER_WAV_USER + '*'):
         os.remove(f)
+    save_path = FOLDER_WAV_USER + uploaded_file.name
     if ".mp3" in uploaded_file:
         sound = AudioSegment.from_mp3(uploaded_file)
     elif ".ogg" in uploaded_file: