mj-new commited on
Commit
d136bc2
1 Parent(s): 423e823

Improved voice collection app

Browse files
Files changed (2) hide show
  1. __pycache__/helpers.cpython-310.pyc +0 -0
  2. app.py +51 -29
__pycache__/helpers.cpython-310.pyc CHANGED
Binary files a/__pycache__/helpers.cpython-310.pyc and b/__pycache__/helpers.cpython-310.pyc differ
 
app.py CHANGED
@@ -12,7 +12,7 @@ import uuid
12
  from datetime import date,datetime
13
  from huggingface_hub import Repository, upload_file
14
  import shutil
15
- from helpers import dict_origin
16
 
17
  HF_TOKEN_WRITE = os.environ.get("HF_TOKEN_WRITE")
18
  print("HF_TOKEN_WRITE", HF_TOKEN_WRITE)
@@ -44,16 +44,19 @@ def get_unique_name():
44
  return ''.join([random.choice(string.ascii_letters
45
  + string.digits) for n in range(32)])
46
 
47
- def get_prompts(domain, type, size, language_code):
48
  print(f"Retrieving prompts for domain {domain} with method: {type} for language_code {language_code} of size {size}")
 
 
49
 
50
- return(promptset[domain], promptset[domain][0])
51
 
52
- def save_recording_and_meta(project_name, recording, transcript, language_code, spk_age, spk_accent, spk_city, spk_gender, spk_nativity, promptset, prompt_number):
53
  #, name, age, gender):
54
  # TODO save user data in the next version
55
 
56
  speaker_metadata={}
 
57
  speaker_metadata['gender'] = spk_gender if spk_gender !='' else 'unknown'
58
  speaker_metadata['age'] = spk_age if spk_age !='' else 'unknown'
59
  speaker_metadata['accent'] = spk_accent if spk_accent !='' else 'unknown'
@@ -61,9 +64,9 @@ def save_recording_and_meta(project_name, recording, transcript, language_code,
61
  speaker_metadata['nativity'] = spk_nativity if spk_nativity !='' else 'unknown'
62
 
63
  # TODO get ISO-693-1 codes
64
- transcript =transcript.strip()
65
 
66
- SAVE_ROOT_DIR = os.path.join(LOCAL_DIR, project_name, today_ymd)
67
 
68
  SAVE_DIR_AUDIO = os.path.join(SAVE_ROOT_DIR, "audio")
69
  SAVE_DIR_META = os.path.join(SAVE_ROOT_DIR, "meta")
@@ -81,23 +84,29 @@ def save_recording_and_meta(project_name, recording, transcript, language_code,
81
  shutil.copy2(recording, audio_output_fp)
82
 
83
  # Write metadata.json to file
84
- meta_fn = uuid_name + 'metadata.jsonl'
85
  json_file_path = os.path.join(SAVE_DIR_META, meta_fn)
86
 
87
  now = datetime.now()
88
  timestamp_str = now.strftime("%d/%m/%Y %H:%M:%S")
89
- metadata= {'id':uuid_name,'audio_file': audio_fn,
90
- 'language_code':language_code,
91
- 'transcript':transcript,'age': speaker_metadata['age'],
92
- 'gender': speaker_metadata['gender'],'accent': speaker_metadata['accent'],
93
- 'nativity': speaker_metadata['nativity'],'city': speaker_metadata['city'],
94
- "date":today_ymd, "timestamp": timestamp_str }
95
-
 
 
 
 
 
 
96
  dump_json(metadata, json_file_path)
97
 
98
  # Simply upload the audio file and metadata using the hub's upload_file
99
  # Upload the audio
100
- repo_audio_path = os.path.join(REPOSITORY_DIR, project_name, today_ymd, "audio", audio_fn)
101
 
102
  _ = upload_file(path_or_fileobj = audio_output_fp,
103
  path_in_repo = repo_audio_path,
@@ -107,7 +116,7 @@ def save_recording_and_meta(project_name, recording, transcript, language_code,
107
  )
108
 
109
  # Upload the metadata
110
- repo_json_path = os.path.join(REPOSITORY_DIR, project_name, today_ymd, "meta", meta_fn)
111
  _ = upload_file(path_or_fileobj = json_file_path,
112
  path_in_repo = repo_json_path,
113
  repo_id = REPO_NAME,
@@ -117,9 +126,20 @@ def save_recording_and_meta(project_name, recording, transcript, language_code,
117
 
118
  output = print(f"Recording {audio_fn} and meta file {meta_fn} successfully saved to repo!")
119
 
120
- # None resets the audio component
121
- prompt_number = prompt_number + 1
122
- prompt = promptset[prompt_number]
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  return [prompt, prompt_number, None]
125
 
@@ -285,7 +305,7 @@ with block:
285
  gr.Markdown(markdown)
286
 
287
  with gr.Tabs():
288
- with gr.TabItem('General settings'):
289
  radio_lang = gr.Radio(["Polish", "English"], label="Language", info="If none is selected, Polish is used")
290
  radio_asr_type = gr.Radio(["Local", "Cloud"], label="Select ASR type", info="Cloud models are faster and more accurate, but costs money")
291
  with gr.Accordion(label="Local ASR settings", open=False):
@@ -306,9 +326,10 @@ with block:
306
  azure_api_key = gr.Textbox(label="", elem_id="pw")
307
  with gr.Accordion(label="Chat GPT settings",open=False):
308
  slider_temp = gr.Slider(minimum=0, maximum= 2, step=0.2, label="ChatGPT temperature")
309
-
310
  with gr.TabItem('Speaker information'):
311
  with gr.Row():
 
312
  dropdown_spk_nativity = gr.Dropdown(["Polish", "Other"], label="Your native language", info="")
313
  dropdown_spk_gender = gr.Dropdown(["Male", "Female", "Other", "Prefer not to say"], label="Your gender", info="")
314
  dropdown_spk_age = gr.Dropdown(["under 20", "20-29", "30-39", "40-49", "50-59", "over 60"], label="Your age range", info="")
@@ -319,7 +340,7 @@ with block:
319
  dropdown_spk_age.change(fn=change_age, inputs=dropdown_spk_age, outputs=spk_age)
320
  dropdown_spk_origin_city.change(fn=change_city, inputs=dropdown_spk_origin_city, outputs=spk_city)
321
 
322
- with gr.TabItem('Voicebot playground'):
323
  mic_recording = gr.Audio(source="microphone", type="filepath", label='Record your voice')
324
  with gr.Row():
325
  button_transcribe = gr.Button("Transcribe speech")
@@ -353,12 +374,12 @@ with block:
353
 
354
  radio_lang.change(fn=change_language, inputs=radio_lang, outputs=language_code)
355
  radio_whisper_model.change(fn=change_whisper_model, inputs=radio_whisper_model, outputs=[whisper_model_type, whisper_model])
 
356
  with gr.TabItem('Batch audio collection'):
357
 
358
-
359
  with gr.Accordion(label="Promptset settings"):
360
- radio_prompts_domain = gr.Dropdown(["Bridge"], label="Select promptset domain", info="")
361
- radio_promptset_type = gr.Radio(["New promptset generation", "Existing promptset use"], label="Language", value ="Existing promptset use", info="New promptset is generated using. Requires providing open AI key in general settings tab")
362
  var_promptset_size = gr.Textbox(label="Specify number of prompts (min 10, max 200)")
363
  button_get_prompts = gr.Button("Save settings and get first prompt to record")
364
 
@@ -366,11 +387,12 @@ with block:
366
  speech_recording = gr.Audio(source="microphone",label="Select 'record from microphone' and read prompt displayed above", type="filepath")
367
 
368
  radio_prompts_domain.change(fn=change_domain, inputs=radio_prompts_domain, outputs=domain)
369
- radio_promptset_type.change(fn=change_prompts_type, inputs=radio_promptset_type, outputs=prompts_type)
370
 
371
  button_save_and_next = gr.Button("Save audio recording and move to the next prompt")
372
- button_get_prompts.click(get_prompts, inputs=[radio_prompts_domain, radio_promptset_type, var_promptset_size, language_code], outputs = [promptset, prompt_text])
373
-
374
- button_save_and_next.click(save_recording_and_meta, inputs=[project_name, speech_recording, prompt_text, language_code, spk_age, spk_accent, spk_city, spk_gender, spk_nativity, promptset, prompt_number], outputs=[prompt_text, prompt_number, speech_recording])
 
375
 
376
  block.launch()
 
12
  from datetime import date,datetime
13
  from huggingface_hub import Repository, upload_file
14
  import shutil
15
+ from helpers import dict_origin, dict_promptset
16
 
17
  HF_TOKEN_WRITE = os.environ.get("HF_TOKEN_WRITE")
18
  print("HF_TOKEN_WRITE", HF_TOKEN_WRITE)
 
44
  return ''.join([random.choice(string.ascii_letters
45
  + string.digits) for n in range(32)])
46
 
47
+ def get_prompts(domain, size, language_code):
48
  print(f"Retrieving prompts for domain {domain} with method: {type} for language_code {language_code} of size {size}")
49
+ size = int(size)
50
+ promptset = dict_promptset[domain][0:size]
51
 
52
+ return(promptset, promptset[0])
53
 
54
+ def save_recording_and_meta(project_name, recording, prompt_text, language_code, spk_name, spk_age, spk_accent, spk_city, spk_gender, spk_nativity, promptset, prompt_number):
55
  #, name, age, gender):
56
  # TODO save user data in the next version
57
 
58
  speaker_metadata={}
59
+ speaker_metadata['name'] = spk_name if spk_name !='' else 'unknown'
60
  speaker_metadata['gender'] = spk_gender if spk_gender !='' else 'unknown'
61
  speaker_metadata['age'] = spk_age if spk_age !='' else 'unknown'
62
  speaker_metadata['accent'] = spk_accent if spk_accent !='' else 'unknown'
 
64
  speaker_metadata['nativity'] = spk_nativity if spk_nativity !='' else 'unknown'
65
 
66
  # TODO get ISO-693-1 codes
67
+ prompt_text =prompt_text.strip()
68
 
69
+ SAVE_ROOT_DIR = os.path.join(LOCAL_DIR, project_name, today_ymd, spk_name)
70
 
71
  SAVE_DIR_AUDIO = os.path.join(SAVE_ROOT_DIR, "audio")
72
  SAVE_DIR_META = os.path.join(SAVE_ROOT_DIR, "meta")
 
84
  shutil.copy2(recording, audio_output_fp)
85
 
86
  # Write metadata.json to file
87
+ meta_fn = uuid_name + '.metadata.jsonl'
88
  json_file_path = os.path.join(SAVE_DIR_META, meta_fn)
89
 
90
  now = datetime.now()
91
  timestamp_str = now.strftime("%d/%m/%Y %H:%M:%S")
92
+ metadata= {'id':uuid_name,
93
+ 'audio_file': audio_fn,
94
+ 'language_code':language_code,
95
+ 'prompt':prompt_text,
96
+ 'name': speaker_metadata['name'],
97
+ 'age': speaker_metadata['age'],
98
+ 'gender': speaker_metadata['gender'],
99
+ 'accent': speaker_metadata['accent'],
100
+ 'nativity': speaker_metadata['nativity'],
101
+ 'city': speaker_metadata['city'],
102
+ "date":today_ymd,
103
+ "timestamp": timestamp_str }
104
+
105
  dump_json(metadata, json_file_path)
106
 
107
  # Simply upload the audio file and metadata using the hub's upload_file
108
  # Upload the audio
109
+ repo_audio_path = os.path.join(REPOSITORY_DIR, project_name, today_ymd, spk_name, "audio", audio_fn)
110
 
111
  _ = upload_file(path_or_fileobj = audio_output_fp,
112
  path_in_repo = repo_audio_path,
 
116
  )
117
 
118
  # Upload the metadata
119
+ repo_json_path = os.path.join(REPOSITORY_DIR, project_name, today_ymd, spk_name, "meta", meta_fn)
120
  _ = upload_file(path_or_fileobj = json_file_path,
121
  path_in_repo = repo_json_path,
122
  repo_id = REPO_NAME,
 
126
 
127
  output = print(f"Recording {audio_fn} and meta file {meta_fn} successfully saved to repo!")
128
 
129
+ # check if prompt number is not set
130
+ # if set already - increment value
131
+ if prompt_number == None:
132
+ prompt_number = 1
133
+ else:
134
+ prompt_number = prompt_number + 1
135
+
136
+ # check if this is the last prompt
137
+ if prompt_number == len(promptset):
138
+ prompt_number = 0
139
+ prompt = "All prompts recorded. Thank you! You can close the app now:)"
140
+ else:
141
+ # get next prompt
142
+ prompt = promptset[prompt_number]
143
 
144
  return [prompt, prompt_number, None]
145
 
 
305
  gr.Markdown(markdown)
306
 
307
  with gr.Tabs():
308
+ """with gr.TabItem('General settings'):
309
  radio_lang = gr.Radio(["Polish", "English"], label="Language", info="If none is selected, Polish is used")
310
  radio_asr_type = gr.Radio(["Local", "Cloud"], label="Select ASR type", info="Cloud models are faster and more accurate, but costs money")
311
  with gr.Accordion(label="Local ASR settings", open=False):
 
326
  azure_api_key = gr.Textbox(label="", elem_id="pw")
327
  with gr.Accordion(label="Chat GPT settings",open=False):
328
  slider_temp = gr.Slider(minimum=0, maximum= 2, step=0.2, label="ChatGPT temperature")
329
+ """
330
  with gr.TabItem('Speaker information'):
331
  with gr.Row():
332
+ spk_name = gr.Textbox(placeholder="Your name", label="Your name", info="Please provide your name")
333
  dropdown_spk_nativity = gr.Dropdown(["Polish", "Other"], label="Your native language", info="")
334
  dropdown_spk_gender = gr.Dropdown(["Male", "Female", "Other", "Prefer not to say"], label="Your gender", info="")
335
  dropdown_spk_age = gr.Dropdown(["under 20", "20-29", "30-39", "40-49", "50-59", "over 60"], label="Your age range", info="")
 
340
  dropdown_spk_age.change(fn=change_age, inputs=dropdown_spk_age, outputs=spk_age)
341
  dropdown_spk_origin_city.change(fn=change_city, inputs=dropdown_spk_origin_city, outputs=spk_city)
342
 
343
+ """with gr.TabItem('Voicebot playground'):
344
  mic_recording = gr.Audio(source="microphone", type="filepath", label='Record your voice')
345
  with gr.Row():
346
  button_transcribe = gr.Button("Transcribe speech")
 
374
 
375
  radio_lang.change(fn=change_language, inputs=radio_lang, outputs=language_code)
376
  radio_whisper_model.change(fn=change_whisper_model, inputs=radio_whisper_model, outputs=[whisper_model_type, whisper_model])
377
+ """
378
  with gr.TabItem('Batch audio collection'):
379
 
 
380
  with gr.Accordion(label="Promptset settings"):
381
+ radio_prompts_domain = gr.Dropdown(["bridge"], label="Select promptset domain", info="")
382
+ #radio_promptset_type = gr.Radio(["New promptset generation", "Existing promptset use"], label="Language", value ="Existing promptset use", info="New promptset is generated using. Requires providing open AI key in general settings tab")
383
  var_promptset_size = gr.Textbox(label="Specify number of prompts (min 10, max 200)")
384
  button_get_prompts = gr.Button("Save settings and get first prompt to record")
385
 
 
387
  speech_recording = gr.Audio(source="microphone",label="Select 'record from microphone' and read prompt displayed above", type="filepath")
388
 
389
  radio_prompts_domain.change(fn=change_domain, inputs=radio_prompts_domain, outputs=domain)
390
+ #radio_promptset_type.change(fn=change_prompts_type, inputs=radio_promptset_type, outputs=prompts_type)
391
 
392
  button_save_and_next = gr.Button("Save audio recording and move to the next prompt")
393
+ # TODO - add option to generate new promptset on the fly for new domains
394
+ button_get_prompts.click(get_prompts, inputs=[radio_prompts_domain, var_promptset_size, language_code], outputs = [promptset, prompt_text])
395
+
396
+ button_save_and_next.click(save_recording_and_meta, inputs=[project_name, speech_recording, prompt_text, language_code, spk_name, spk_age, spk_accent, spk_city, spk_gender, spk_nativity, promptset, prompt_number], outputs=[prompt_text, prompt_number, speech_recording])
397
 
398
  block.launch()