Spaces:
Sleeping
Sleeping
mj-new
commited on
Commit
•
d136bc2
1
Parent(s):
423e823
Improved voice collection app
Browse files- __pycache__/helpers.cpython-310.pyc +0 -0
- app.py +51 -29
__pycache__/helpers.cpython-310.pyc
CHANGED
Binary files a/__pycache__/helpers.cpython-310.pyc and b/__pycache__/helpers.cpython-310.pyc differ
|
|
app.py
CHANGED
@@ -12,7 +12,7 @@ import uuid
|
|
12 |
from datetime import date,datetime
|
13 |
from huggingface_hub import Repository, upload_file
|
14 |
import shutil
|
15 |
-
from helpers import dict_origin
|
16 |
|
17 |
HF_TOKEN_WRITE = os.environ.get("HF_TOKEN_WRITE")
|
18 |
print("HF_TOKEN_WRITE", HF_TOKEN_WRITE)
|
@@ -44,16 +44,19 @@ def get_unique_name():
|
|
44 |
return ''.join([random.choice(string.ascii_letters
|
45 |
+ string.digits) for n in range(32)])
|
46 |
|
47 |
-
def get_prompts(domain,
|
48 |
print(f"Retrieving prompts for domain {domain} with method: {type} for language_code {language_code} of size {size}")
|
|
|
|
|
49 |
|
50 |
-
return(promptset
|
51 |
|
52 |
-
def save_recording_and_meta(project_name, recording,
|
53 |
#, name, age, gender):
|
54 |
# TODO save user data in the next version
|
55 |
|
56 |
speaker_metadata={}
|
|
|
57 |
speaker_metadata['gender'] = spk_gender if spk_gender !='' else 'unknown'
|
58 |
speaker_metadata['age'] = spk_age if spk_age !='' else 'unknown'
|
59 |
speaker_metadata['accent'] = spk_accent if spk_accent !='' else 'unknown'
|
@@ -61,9 +64,9 @@ def save_recording_and_meta(project_name, recording, transcript, language_code,
|
|
61 |
speaker_metadata['nativity'] = spk_nativity if spk_nativity !='' else 'unknown'
|
62 |
|
63 |
# TODO get ISO-693-1 codes
|
64 |
-
|
65 |
|
66 |
-
SAVE_ROOT_DIR = os.path.join(LOCAL_DIR, project_name, today_ymd)
|
67 |
|
68 |
SAVE_DIR_AUDIO = os.path.join(SAVE_ROOT_DIR, "audio")
|
69 |
SAVE_DIR_META = os.path.join(SAVE_ROOT_DIR, "meta")
|
@@ -81,23 +84,29 @@ def save_recording_and_meta(project_name, recording, transcript, language_code,
|
|
81 |
shutil.copy2(recording, audio_output_fp)
|
82 |
|
83 |
# Write metadata.json to file
|
84 |
-
meta_fn = uuid_name + 'metadata.jsonl'
|
85 |
json_file_path = os.path.join(SAVE_DIR_META, meta_fn)
|
86 |
|
87 |
now = datetime.now()
|
88 |
timestamp_str = now.strftime("%d/%m/%Y %H:%M:%S")
|
89 |
-
metadata= {'id':uuid_name,
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
dump_json(metadata, json_file_path)
|
97 |
|
98 |
# Simply upload the audio file and metadata using the hub's upload_file
|
99 |
# Upload the audio
|
100 |
-
repo_audio_path = os.path.join(REPOSITORY_DIR, project_name, today_ymd, "audio", audio_fn)
|
101 |
|
102 |
_ = upload_file(path_or_fileobj = audio_output_fp,
|
103 |
path_in_repo = repo_audio_path,
|
@@ -107,7 +116,7 @@ def save_recording_and_meta(project_name, recording, transcript, language_code,
|
|
107 |
)
|
108 |
|
109 |
# Upload the metadata
|
110 |
-
repo_json_path = os.path.join(REPOSITORY_DIR, project_name, today_ymd, "meta", meta_fn)
|
111 |
_ = upload_file(path_or_fileobj = json_file_path,
|
112 |
path_in_repo = repo_json_path,
|
113 |
repo_id = REPO_NAME,
|
@@ -117,9 +126,20 @@ def save_recording_and_meta(project_name, recording, transcript, language_code,
|
|
117 |
|
118 |
output = print(f"Recording {audio_fn} and meta file {meta_fn} successfully saved to repo!")
|
119 |
|
120 |
-
#
|
121 |
-
|
122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
return [prompt, prompt_number, None]
|
125 |
|
@@ -285,7 +305,7 @@ with block:
|
|
285 |
gr.Markdown(markdown)
|
286 |
|
287 |
with gr.Tabs():
|
288 |
-
with gr.TabItem('General settings'):
|
289 |
radio_lang = gr.Radio(["Polish", "English"], label="Language", info="If none is selected, Polish is used")
|
290 |
radio_asr_type = gr.Radio(["Local", "Cloud"], label="Select ASR type", info="Cloud models are faster and more accurate, but costs money")
|
291 |
with gr.Accordion(label="Local ASR settings", open=False):
|
@@ -306,9 +326,10 @@ with block:
|
|
306 |
azure_api_key = gr.Textbox(label="", elem_id="pw")
|
307 |
with gr.Accordion(label="Chat GPT settings",open=False):
|
308 |
slider_temp = gr.Slider(minimum=0, maximum= 2, step=0.2, label="ChatGPT temperature")
|
309 |
-
|
310 |
with gr.TabItem('Speaker information'):
|
311 |
with gr.Row():
|
|
|
312 |
dropdown_spk_nativity = gr.Dropdown(["Polish", "Other"], label="Your native language", info="")
|
313 |
dropdown_spk_gender = gr.Dropdown(["Male", "Female", "Other", "Prefer not to say"], label="Your gender", info="")
|
314 |
dropdown_spk_age = gr.Dropdown(["under 20", "20-29", "30-39", "40-49", "50-59", "over 60"], label="Your age range", info="")
|
@@ -319,7 +340,7 @@ with block:
|
|
319 |
dropdown_spk_age.change(fn=change_age, inputs=dropdown_spk_age, outputs=spk_age)
|
320 |
dropdown_spk_origin_city.change(fn=change_city, inputs=dropdown_spk_origin_city, outputs=spk_city)
|
321 |
|
322 |
-
with gr.TabItem('Voicebot playground'):
|
323 |
mic_recording = gr.Audio(source="microphone", type="filepath", label='Record your voice')
|
324 |
with gr.Row():
|
325 |
button_transcribe = gr.Button("Transcribe speech")
|
@@ -353,12 +374,12 @@ with block:
|
|
353 |
|
354 |
radio_lang.change(fn=change_language, inputs=radio_lang, outputs=language_code)
|
355 |
radio_whisper_model.change(fn=change_whisper_model, inputs=radio_whisper_model, outputs=[whisper_model_type, whisper_model])
|
|
|
356 |
with gr.TabItem('Batch audio collection'):
|
357 |
|
358 |
-
|
359 |
with gr.Accordion(label="Promptset settings"):
|
360 |
-
radio_prompts_domain = gr.Dropdown(["
|
361 |
-
radio_promptset_type = gr.Radio(["New promptset generation", "Existing promptset use"], label="Language", value ="Existing promptset use", info="New promptset is generated using. Requires providing open AI key in general settings tab")
|
362 |
var_promptset_size = gr.Textbox(label="Specify number of prompts (min 10, max 200)")
|
363 |
button_get_prompts = gr.Button("Save settings and get first prompt to record")
|
364 |
|
@@ -366,11 +387,12 @@ with block:
|
|
366 |
speech_recording = gr.Audio(source="microphone",label="Select 'record from microphone' and read prompt displayed above", type="filepath")
|
367 |
|
368 |
radio_prompts_domain.change(fn=change_domain, inputs=radio_prompts_domain, outputs=domain)
|
369 |
-
radio_promptset_type.change(fn=change_prompts_type, inputs=radio_promptset_type, outputs=prompts_type)
|
370 |
|
371 |
button_save_and_next = gr.Button("Save audio recording and move to the next prompt")
|
372 |
-
|
373 |
-
|
374 |
-
|
|
|
375 |
|
376 |
block.launch()
|
|
|
12 |
from datetime import date,datetime
|
13 |
from huggingface_hub import Repository, upload_file
|
14 |
import shutil
|
15 |
+
from helpers import dict_origin, dict_promptset
|
16 |
|
17 |
HF_TOKEN_WRITE = os.environ.get("HF_TOKEN_WRITE")
|
18 |
print("HF_TOKEN_WRITE", HF_TOKEN_WRITE)
|
|
|
44 |
return ''.join([random.choice(string.ascii_letters
|
45 |
+ string.digits) for n in range(32)])
|
46 |
|
47 |
+
def get_prompts(domain, size, language_code):
|
48 |
print(f"Retrieving prompts for domain {domain} with method: {type} for language_code {language_code} of size {size}")
|
49 |
+
size = int(size)
|
50 |
+
promptset = dict_promptset[domain][0:size]
|
51 |
|
52 |
+
return(promptset, promptset[0])
|
53 |
|
54 |
+
def save_recording_and_meta(project_name, recording, prompt_text, language_code, spk_name, spk_age, spk_accent, spk_city, spk_gender, spk_nativity, promptset, prompt_number):
|
55 |
#, name, age, gender):
|
56 |
# TODO save user data in the next version
|
57 |
|
58 |
speaker_metadata={}
|
59 |
+
speaker_metadata['name'] = spk_name if spk_name !='' else 'unknown'
|
60 |
speaker_metadata['gender'] = spk_gender if spk_gender !='' else 'unknown'
|
61 |
speaker_metadata['age'] = spk_age if spk_age !='' else 'unknown'
|
62 |
speaker_metadata['accent'] = spk_accent if spk_accent !='' else 'unknown'
|
|
|
64 |
speaker_metadata['nativity'] = spk_nativity if spk_nativity !='' else 'unknown'
|
65 |
|
66 |
# TODO get ISO-693-1 codes
|
67 |
+
prompt_text =prompt_text.strip()
|
68 |
|
69 |
+
SAVE_ROOT_DIR = os.path.join(LOCAL_DIR, project_name, today_ymd, spk_name)
|
70 |
|
71 |
SAVE_DIR_AUDIO = os.path.join(SAVE_ROOT_DIR, "audio")
|
72 |
SAVE_DIR_META = os.path.join(SAVE_ROOT_DIR, "meta")
|
|
|
84 |
shutil.copy2(recording, audio_output_fp)
|
85 |
|
86 |
# Write metadata.json to file
|
87 |
+
meta_fn = uuid_name + '.metadata.jsonl'
|
88 |
json_file_path = os.path.join(SAVE_DIR_META, meta_fn)
|
89 |
|
90 |
now = datetime.now()
|
91 |
timestamp_str = now.strftime("%d/%m/%Y %H:%M:%S")
|
92 |
+
metadata= {'id':uuid_name,
|
93 |
+
'audio_file': audio_fn,
|
94 |
+
'language_code':language_code,
|
95 |
+
'prompt':prompt_text,
|
96 |
+
'name': speaker_metadata['name'],
|
97 |
+
'age': speaker_metadata['age'],
|
98 |
+
'gender': speaker_metadata['gender'],
|
99 |
+
'accent': speaker_metadata['accent'],
|
100 |
+
'nativity': speaker_metadata['nativity'],
|
101 |
+
'city': speaker_metadata['city'],
|
102 |
+
"date":today_ymd,
|
103 |
+
"timestamp": timestamp_str }
|
104 |
+
|
105 |
dump_json(metadata, json_file_path)
|
106 |
|
107 |
# Simply upload the audio file and metadata using the hub's upload_file
|
108 |
# Upload the audio
|
109 |
+
repo_audio_path = os.path.join(REPOSITORY_DIR, project_name, today_ymd, spk_name, "audio", audio_fn)
|
110 |
|
111 |
_ = upload_file(path_or_fileobj = audio_output_fp,
|
112 |
path_in_repo = repo_audio_path,
|
|
|
116 |
)
|
117 |
|
118 |
# Upload the metadata
|
119 |
+
repo_json_path = os.path.join(REPOSITORY_DIR, project_name, today_ymd, spk_name, "meta", meta_fn)
|
120 |
_ = upload_file(path_or_fileobj = json_file_path,
|
121 |
path_in_repo = repo_json_path,
|
122 |
repo_id = REPO_NAME,
|
|
|
126 |
|
127 |
output = print(f"Recording {audio_fn} and meta file {meta_fn} successfully saved to repo!")
|
128 |
|
129 |
+
# check if prompt number is not set
|
130 |
+
# if set already - increment value
|
131 |
+
if prompt_number == None:
|
132 |
+
prompt_number = 1
|
133 |
+
else:
|
134 |
+
prompt_number = prompt_number + 1
|
135 |
+
|
136 |
+
# check if this is the last prompt
|
137 |
+
if prompt_number == len(promptset):
|
138 |
+
prompt_number = 0
|
139 |
+
prompt = "All prompts recorded. Thank you! You can close the app now:)"
|
140 |
+
else:
|
141 |
+
# get next prompt
|
142 |
+
prompt = promptset[prompt_number]
|
143 |
|
144 |
return [prompt, prompt_number, None]
|
145 |
|
|
|
305 |
gr.Markdown(markdown)
|
306 |
|
307 |
with gr.Tabs():
|
308 |
+
"""with gr.TabItem('General settings'):
|
309 |
radio_lang = gr.Radio(["Polish", "English"], label="Language", info="If none is selected, Polish is used")
|
310 |
radio_asr_type = gr.Radio(["Local", "Cloud"], label="Select ASR type", info="Cloud models are faster and more accurate, but costs money")
|
311 |
with gr.Accordion(label="Local ASR settings", open=False):
|
|
|
326 |
azure_api_key = gr.Textbox(label="", elem_id="pw")
|
327 |
with gr.Accordion(label="Chat GPT settings",open=False):
|
328 |
slider_temp = gr.Slider(minimum=0, maximum= 2, step=0.2, label="ChatGPT temperature")
|
329 |
+
"""
|
330 |
with gr.TabItem('Speaker information'):
|
331 |
with gr.Row():
|
332 |
+
spk_name = gr.Textbox(placeholder="Your name", label="Your name", info="Please provide your name")
|
333 |
dropdown_spk_nativity = gr.Dropdown(["Polish", "Other"], label="Your native language", info="")
|
334 |
dropdown_spk_gender = gr.Dropdown(["Male", "Female", "Other", "Prefer not to say"], label="Your gender", info="")
|
335 |
dropdown_spk_age = gr.Dropdown(["under 20", "20-29", "30-39", "40-49", "50-59", "over 60"], label="Your age range", info="")
|
|
|
340 |
dropdown_spk_age.change(fn=change_age, inputs=dropdown_spk_age, outputs=spk_age)
|
341 |
dropdown_spk_origin_city.change(fn=change_city, inputs=dropdown_spk_origin_city, outputs=spk_city)
|
342 |
|
343 |
+
"""with gr.TabItem('Voicebot playground'):
|
344 |
mic_recording = gr.Audio(source="microphone", type="filepath", label='Record your voice')
|
345 |
with gr.Row():
|
346 |
button_transcribe = gr.Button("Transcribe speech")
|
|
|
374 |
|
375 |
radio_lang.change(fn=change_language, inputs=radio_lang, outputs=language_code)
|
376 |
radio_whisper_model.change(fn=change_whisper_model, inputs=radio_whisper_model, outputs=[whisper_model_type, whisper_model])
|
377 |
+
"""
|
378 |
with gr.TabItem('Batch audio collection'):
|
379 |
|
|
|
380 |
with gr.Accordion(label="Promptset settings"):
|
381 |
+
radio_prompts_domain = gr.Dropdown(["bridge"], label="Select promptset domain", info="")
|
382 |
+
#radio_promptset_type = gr.Radio(["New promptset generation", "Existing promptset use"], label="Language", value ="Existing promptset use", info="New promptset is generated using. Requires providing open AI key in general settings tab")
|
383 |
var_promptset_size = gr.Textbox(label="Specify number of prompts (min 10, max 200)")
|
384 |
button_get_prompts = gr.Button("Save settings and get first prompt to record")
|
385 |
|
|
|
387 |
speech_recording = gr.Audio(source="microphone",label="Select 'record from microphone' and read prompt displayed above", type="filepath")
|
388 |
|
389 |
radio_prompts_domain.change(fn=change_domain, inputs=radio_prompts_domain, outputs=domain)
|
390 |
+
#radio_promptset_type.change(fn=change_prompts_type, inputs=radio_promptset_type, outputs=prompts_type)
|
391 |
|
392 |
button_save_and_next = gr.Button("Save audio recording and move to the next prompt")
|
393 |
+
# TODO - add option to generate new promptset on the fly for new domains
|
394 |
+
button_get_prompts.click(get_prompts, inputs=[radio_prompts_domain, var_promptset_size, language_code], outputs = [promptset, prompt_text])
|
395 |
+
|
396 |
+
button_save_and_next.click(save_recording_and_meta, inputs=[project_name, speech_recording, prompt_text, language_code, spk_name, spk_age, spk_accent, spk_city, spk_gender, spk_nativity, promptset, prompt_number], outputs=[prompt_text, prompt_number, speech_recording])
|
397 |
|
398 |
block.launch()
|