Spaces:
Build error
Build error
pustozerov
commited on
Commit
•
8f18caf
1
Parent(s):
f508160
Database with examples was completely moved to the HuggingFace cloud.
Browse files- .gitignore +1 -0
- app.py +17 -12
- requirements.txt +4 -2
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
/data/user_data/
|
2 |
/info/transcripts/
|
|
|
|
1 |
/data/user_data/
|
2 |
/info/transcripts/
|
3 |
+
/info/configs/manifests/
|
app.py
CHANGED
@@ -1,30 +1,35 @@
|
|
1 |
import glob
|
2 |
import random
|
3 |
import os
|
|
|
4 |
import soundfile as sf
|
5 |
import streamlit as st
|
6 |
from pydub import AudioSegment
|
|
|
|
|
7 |
|
8 |
from modules.diarization.nemo_diarization import diarization
|
9 |
|
|
|
|
|
|
|
|
|
10 |
st.title('Call Transcription demo')
|
11 |
st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
|
12 |
'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
|
13 |
'pickup a set of images from the built-in database or try uploading your own files.')
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
audio_file
|
23 |
-
audio_bytes = audio_file.read()
|
24 |
-
st.audio(audio_bytes)
|
25 |
-
f = sf.SoundFile(chosen_files[0])
|
26 |
st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
|
27 |
-
result = diarization(
|
28 |
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
29 |
transcript = f.read()
|
30 |
st.write("Transcription completed.")
|
|
|
1 |
import glob
|
2 |
import random
|
3 |
import os
|
4 |
+
import numpy as np
|
5 |
import soundfile as sf
|
6 |
import streamlit as st
|
7 |
from pydub import AudioSegment
|
8 |
+
from datasets import load_dataset
|
9 |
+
from scipy.io.wavfile import write
|
10 |
|
11 |
from modules.diarization.nemo_diarization import diarization
|
12 |
|
13 |
+
FOLDER_WAV = "data/user_data"
|
14 |
+
SAMPLE_RATE = 16000
|
15 |
+
dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
|
16 |
+
|
17 |
st.title('Call Transcription demo')
|
18 |
st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
|
19 |
'automatic speech recognition and diarization. It works with mp3, ogg and wav files. You can randomly '
|
20 |
'pickup a set of images from the built-in database or try uploading your own files.')
|
21 |
+
if st.button('Try a random sample from the database'):
|
22 |
|
23 |
+
shuffled_dataset = dataset.shuffle(seed=random.randint(0, 100))
|
24 |
+
file_name = str(shuffled_dataset["file"][0]).split(".")[0]
|
25 |
+
audio_bytes = np.array(shuffled_dataset["data"][0])
|
26 |
+
audio_bytes_scaled = np.int16(audio_bytes / np.max(np.abs(audio_bytes)) * 32767)
|
27 |
+
write(os.path.join(FOLDER_WAV, file_name + '.wav'), rate=SAMPLE_RATE, data=audio_bytes_scaled)
|
28 |
+
f = sf.SoundFile(os.path.join(FOLDER_WAV, file_name + '.wav'))
|
29 |
+
audio_file = open(os.path.join(FOLDER_WAV, file_name + '.wav'), 'rb')
|
30 |
+
st.audio(audio_file.read())
|
|
|
|
|
|
|
31 |
st.write("Starting transcription. Estimated processing time: %0.1f seconds" % (f.frames / (f.samplerate * 5)))
|
32 |
+
result = diarization(os.path.join(FOLDER_WAV, file_name + '.wav'))
|
33 |
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
34 |
transcript = f.read()
|
35 |
st.write("Transcription completed.")
|
requirements.txt
CHANGED
@@ -33,10 +33,12 @@ sentencepiece==0.1.96
|
|
33 |
SoundFile==0.10.3.post1
|
34 |
spacy==3.4.0
|
35 |
speechbrain @ git+https://github.com/speechbrain/speechbrain.git
|
36 |
-
streamlit
|
37 |
torch==1.12.0
|
38 |
torchaudio==0.12.0
|
39 |
transformers==4.20.0
|
40 |
webdataset==0.1.62
|
41 |
Cython==0.29.14
|
42 |
-
youtokentome
|
|
|
|
|
|
33 |
SoundFile==0.10.3.post1
|
34 |
spacy==3.4.0
|
35 |
speechbrain @ git+https://github.com/speechbrain/speechbrain.git
|
36 |
+
streamlit~=1.11.1
|
37 |
torch==1.12.0
|
38 |
torchaudio==0.12.0
|
39 |
transformers==4.20.0
|
40 |
webdataset==0.1.62
|
41 |
Cython==0.29.14
|
42 |
+
youtokentome
|
43 |
+
datasets~=2.4.0
|
44 |
+
NEMO~=4.1.1
|