pustozerov commited on
Commit
516b73f
1 Parent(s): 8f18caf

Separated paths to user and DB data.

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -10,7 +10,8 @@ from scipy.io.wavfile import write
10
 
11
  from modules.diarization.nemo_diarization import diarization
12
 
13
- FOLDER_WAV = "data/user_data"
 
14
  SAMPLE_RATE = 16000
15
  dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
16
 
@@ -19,17 +20,17 @@ st.subheader('This simple demo shows the possibilities of the ASR and NLP in the
19
  'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
20
  'pickup a set of images from the built-in database or try uploading your own files.')
21
  if st.button('Try a random sample from the database'):
22
-
23
  shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
24
  file_name = str(shuffled_dataset["file"][0]).split(".")[0]
25
  audio_bytes = np.array(shuffled_dataset["data"][0])
26
  audio_bytes_scaled = np.int16(audio_bytes / np.max(np.abs(audio_bytes)) * 32767)
27
- write(os.path.join(FOLDER_WAV, file_name + '.wav'), rate=SAMPLE_RATE, data=audio_bytes_scaled)
28
- f = sf.SoundFile(os.path.join(FOLDER_WAV, file_name + '.wav'))
29
- audio_file = open(os.path.join(FOLDER_WAV, file_name + '.wav'), 'rb')
30
  st.audio(audio_file.read())
31
  st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
32
- result = diarization(os.path.join(FOLDER_WAV, file_name + '.wav'))
33
  with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
34
  transcript = f.read()
35
  st.write("Transcription completed.")
@@ -46,11 +47,10 @@ if st.button('Try a random sample from the database'):
46
  uploaded_file = st.file_uploader("Choose your recording with a speech",
47
  accept_multiple_files=False, type=["mp3", "wav", "ogg"])
48
  if uploaded_file is not None:
49
- folder = "data/user_data/"
50
- os.makedirs(folder, exist_ok=True)
51
- for f in glob.glob(folder + '*'):
52
  os.remove(f)
53
- save_path = folder + uploaded_file.name
54
  if ".mp3" in uploaded_file:
55
  sound = AudioSegment.from_mp3(uploaded_file)
56
  elif ".ogg" in uploaded_file:
 
10
 
11
  from modules.diarization.nemo_diarization import diarization
12
 
13
+ FOLDER_WAV_DB = "data/database/"
14
+ FOLDER_WAV_USER = "data/user_data/"
15
  SAMPLE_RATE = 16000
16
  dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
17
 
 
20
  'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
21
  'pickup a set of images from the built-in database or try uploading your own files.')
22
  if st.button('Try a random sample from the database'):
23
+ os.makedirs(FOLDER_WAV_DB, exist_ok=True)
24
  shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
25
  file_name = str(shuffled_dataset["file"][0]).split(".")[0]
26
  audio_bytes = np.array(shuffled_dataset["data"][0])
27
  audio_bytes_scaled = np.int16(audio_bytes / np.max(np.abs(audio_bytes)) * 32767)
28
+ write(os.path.join(FOLDER_WAV_DB, file_name + '.wav'), rate=SAMPLE_RATE, data=audio_bytes_scaled)
29
+ f = sf.SoundFile(os.path.join(FOLDER_WAV_DB, file_name + '.wav'))
30
+ audio_file = open(os.path.join(FOLDER_WAV_DB, file_name + '.wav'), 'rb')
31
  st.audio(audio_file.read())
32
  st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
33
+ result = diarization(os.path.join(FOLDER_WAV_DB, file_name + '.wav'))
34
  with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
35
  transcript = f.read()
36
  st.write("Transcription completed.")
 
47
  uploaded_file = st.file_uploader("Choose your recording with a speech",
48
  accept_multiple_files=False, type=["mp3", "wav", "ogg"])
49
  if uploaded_file is not None:
50
+ os.makedirs(FOLDER_WAV_USER, exist_ok=True)
51
+ for f in glob.glob(FOLDER_WAV_USER + '*'):
 
52
  os.remove(f)
53
+ save_path = FOLDER_WAV_USER + uploaded_file.name
54
  if ".mp3" in uploaded_file:
55
  sound = AudioSegment.from_mp3(uploaded_file)
56
  elif ".ogg" in uploaded_file: