Spaces:
Build error
Build error
pustozerov
commited on
Commit
•
ece7272
1
Parent(s):
4652f5c
Added auto-creation of manifests folder. Updated packages.txt.
Browse files- app.py +12 -1
- packages.txt +1 -6
app.py
CHANGED
@@ -14,8 +14,10 @@ from modules.nlp.nemo_punct_cap import punctuation_capitalization
|
|
14 |
FOLDER_WAV_DB = "data/database/"
|
15 |
FOLDER_USER_DATA = "data/user_data/"
|
16 |
FOLDER_USER_DATA_WAV = "data/user_data_wav/"
|
|
|
17 |
SAMPLE_RATE = 16000
|
18 |
dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
|
|
|
19 |
|
20 |
st.title('Call Transcription demo')
|
21 |
st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
|
@@ -79,10 +81,19 @@ if uploaded_file is not None:
|
|
79 |
result = diarization(save_path)
|
80 |
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
81 |
transcript = f.read()
|
82 |
-
st.write("Transcription completed.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
|
84 |
st.write("Sentences: %s" % len(result[file_name]["sentences"]))
|
85 |
st.write("Words: %s" % len(result[file_name]["words"]))
|
|
|
86 |
st.download_button(
|
87 |
label="Download audio transcript",
|
88 |
data=transcript,
|
|
|
14 |
FOLDER_WAV_DB = "data/database/"
|
15 |
FOLDER_USER_DATA = "data/user_data/"
|
16 |
FOLDER_USER_DATA_WAV = "data/user_data_wav/"
|
17 |
+
FOLDER_MANIFESTS = "info/configs/manifests/"
|
18 |
SAMPLE_RATE = 16000
|
19 |
dataset = load_dataset("pustozerov/crema_d_diarization", split='validation')
|
20 |
+
os.makedirs(FOLDER_WAV_DB, exist_ok=True)
|
21 |
|
22 |
st.title('Call Transcription demo')
|
23 |
st.subheader('This simple demo shows the possibilities of the ASR and NLP in the task of '
|
|
|
81 |
result = diarization(save_path)
|
82 |
with open("info/transcripts/pred_rttms/" + file_name + ".txt") as f:
|
83 |
transcript = f.read()
|
84 |
+
st.write("Transcription completed. Starting assigning punctuation and capitalization.")
|
85 |
+
sentences = result[file_name]["sentences"]
|
86 |
+
all_strings = ""
|
87 |
+
for sentence in sentences:
|
88 |
+
all_strings = all_strings + sentence["sentence"] + "\n"
|
89 |
+
all_strings = punctuation_capitalization([all_strings])[0]
|
90 |
+
st.write("Punctuation and capitalization are ready. Starting named entity recognition.")
|
91 |
+
tagged_string, tags_summary = detect_ner(all_strings)
|
92 |
+
transcript = transcript + '\n' + tagged_string
|
93 |
st.write("Number of speakers: %s" % result[file_name]["speaker_count"])
|
94 |
st.write("Sentences: %s" % len(result[file_name]["sentences"]))
|
95 |
st.write("Words: %s" % len(result[file_name]["words"]))
|
96 |
+
st.write("Found named entities: %s" % tags_summary)
|
97 |
st.download_button(
|
98 |
label="Download audio transcript",
|
99 |
data=transcript,
|
packages.txt
CHANGED
@@ -6,9 +6,4 @@ python3-opencv
|
|
6 |
unzip
|
7 |
libc6
|
8 |
libsm6
|
9 |
-
libxext6
|
10 |
-
libxcb-xinerama0
|
11 |
-
wget
|
12 |
-
libglfw3-dev
|
13 |
-
libgles2-mesa-dev
|
14 |
-
xvfb
|
|
|
6 |
unzip
|
7 |
libc6
|
8 |
libsm6
|
9 |
+
libxext6
|
|
|
|
|
|
|
|
|
|