Files changed (1) hide show
  1. app.py +55 -114
app.py CHANGED
@@ -12,14 +12,14 @@ from pydub import AudioSegment
12
 
13
  def convert_yt_to_wav(url):
14
  if not url:
15
- return "Primero introduce el enlace del video", None
16
 
17
  try:
18
- print(f"Convirtiendo video {url}...")
19
- # Descargar el video utilizando pytube
20
  video = pytube.YouTube(url)
21
  stream = video.streams.filter(only_audio=True).first()
22
- video_output_folder = os.path.join(f"yt_videos") # Ruta de destino de la carpeta
23
  audio_output_folder = 'audios'
24
 
25
  print("Downloading video")
@@ -29,9 +29,9 @@ def convert_yt_to_wav(url):
29
  file_name = os.path.basename(video_file_path)
30
 
31
  audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav'))
32
- # convert mp4 to wav
33
  print("Converting to wav")
34
- sound = AudioSegment.from_file(video_file_path,format="mp4")
35
  sound.export(audio_file_path, format="wav")
36
 
37
  if os.path.exists(video_file_path):
@@ -39,72 +39,72 @@ def convert_yt_to_wav(url):
39
 
40
  return "Success", audio_file_path
41
  except ConnectionResetError as cre:
42
- return "Se ha perdido la conexión, recarga o reintentalo nuevamente más tarde.", None
43
  except Exception as e:
44
  return str(e), None
45
 
46
  with gr.Blocks() as app:
47
  gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
48
 
49
- gr.HTML("<h4> El espacio actual usa solo cpu, así que es solo para inferencia. Se recomienda duplicar el espacio para no tener problemas con las colas de procesamiento. </h4>")
50
 
51
- gr.Markdown("Simple RVC GPU Inference on colab: [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/drive/1NKqqTR04HujeBxzwe7jbYEvNi8LbxD_N?usp=sharing)")
52
  gr.Markdown(
53
  "[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/juuxn/SimpleRVC?duplicate=true)\n\n"
54
  )
55
 
56
- gr.Markdown("Recopilación de modelos que puedes usar: RVC + Kits ai. **[RVC Community Models](https://docs.google.com/spreadsheets/d/1owfUtQuLW9ReiIwg6U9UkkDmPOTkuNHf0OKQtWu1iaI)**")
57
 
58
- with gr.Tab("Inferencia"):
59
- model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo", show_label=True)
60
  with gr.Row():
61
  with gr.Column():
62
- audio_path = gr.Audio(label="Archivo de audio", show_label=True, type="filepath",)
63
- index_rate = gr.Slider(minimum=0, maximum=1, label="Search feature ratio:", value=0.75, interactive=True,)
64
- filter_radius1 = gr.Slider(minimum=0, maximum=7, label="Filtro (reducción de asperezas respiración)", value=3, step=1, interactive=True,)
65
  with gr.Column():
66
  f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"],
67
  value="rmvpe",
68
- label="Algoritmo", show_label=True)
69
- vc_transform0 = gr.Slider(minimum=-12, label="Número de semitonos, subir una octava: 12, bajar una octava: -12", value=0, maximum=12, step=1)
70
  protect0 = gr.Slider(
71
- minimum=0, maximum=0.5, label="Protejer las consonantes sordas y los sonidos respiratorios. 0.5 para desactivarlo.", value=0.33,
72
  step=0.01,
73
- interactive=True,
74
  )
75
  resample_sr1 = gr.Slider(
76
  minimum=0,
77
  maximum=48000,
78
- label="Re-muestreo sobre el audio de salida hasta la frecuencia de muestreo final. 0 para no re-muestrear.",
79
  value=0,
80
  step=1,
81
  interactive=True,
82
  )
83
 
84
- # Salida
85
  with gr.Row():
86
- vc_output1 = gr.Textbox(label="Salida")
87
- vc_output2 = gr.Audio(label="Audio de salida")
88
 
89
- btn = gr.Button(value="Convertir")
90
  btn.click(infer, inputs=[model_url, f0_method, audio_path, index_rate, vc_transform0, protect0, resample_sr1, filter_radius1], outputs=[vc_output1, vc_output2])
91
 
92
  with gr.TabItem("TTS"):
93
  with gr.Row():
94
  tts_text = gr.Textbox(
95
- label="Texto:",
96
- placeholder="Texto que deseas convertir a voz...",
97
  lines=6,
98
  )
99
 
100
  with gr.Column():
101
  with gr.Row():
102
- tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo RVC", show_label=True)
103
 
104
  with gr.Row():
105
- tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="Método TTS:", visible=True)
106
- tts_model = gr.Dropdown(choices=EDGE_VOICES, label="Modelo TTS:", visible=True, interactive=True)
107
- tts_api_key = gr.Textbox(label="ElevenLabs Api key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a",interactive=True, visible=False)
108
 
109
  tts_coqui_languages = gr.Radio(
110
  label="Language",
@@ -113,116 +113,57 @@ with gr.Blocks() as app:
113
  visible=False
114
  )
115
 
116
- tts_btn = gr.Button(value="Convertir")
117
 
118
  with gr.Row():
119
- tts_vc_output1 = gr.Textbox(label="Salida")
120
- tts_vc_output2 = gr.Audio(label="Audio de salida")
121
 
122
  tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
123
 
124
- tts_msg = gr.Markdown("""**Recomiendo que te crees una cuenta de eleven labs y pongas tu clave de api, es gratis y tienes 10k caracteres de limite al mes.** <br/>
125
  ![Imgur](https://imgur.com/HH6YTu0.png)
126
  """, visible=False)
127
 
128
  tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
129
 
130
  with gr.TabItem("Youtube"):
131
- gr.Markdown("## Convertir video de Youtube a audio")
132
  with gr.Row():
133
  yt_url = gr.Textbox(
134
- label="Url del video:",
135
  placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q"
136
  )
137
- yt_btn = gr.Button(value="Convertir")
138
 
139
  with gr.Row():
140
- yt_output1 = gr.Textbox(label="Salida")
141
- yt_output2 = gr.Audio(label="Audio de salida")
142
 
143
  yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2])
144
 
145
- # with gr.TabItem("Mejora de audio"):
146
- # enhance_input_audio = gr.Audio(label="Audio de entrada")
147
- # enhance_output_audio = gr.Audio(label="Audio de salida")
148
-
149
- # btn_enhance_audio = gr.Button()
150
- # # btn_enhance_audio.click(fn=audio_enhance, inputs=[enhance_input_audio], outputs=[enhance_output_audio])
151
-
152
-
153
- with gr.Tab("Modelos"):
154
- gr.HTML("<h4>Buscar modelos</h4>")
155
- search_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
156
- # Salida
157
  with gr.Row():
158
- sarch_output = gr.Markdown(label="Salida")
159
 
160
- btn_search_model = gr.Button(value="Buscar")
161
- btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[sarch_output])
162
 
163
- gr.HTML("<h4>Publica tu modelo</h4>")
164
- post_name = gr.Textbox(placeholder="Billie Eillish (RVC v2 - 100 epoch)", label="Nombre", show_label=True)
165
- post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Url del modelo", show_label=True)
166
- post_creator = gr.Textbox(placeholder="ID de discord o enlace al perfil del creador", label="Creador", show_label=True)
167
- post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Versión", show_label=True)
168
 
169
- # Salida
170
  with gr.Row():
171
- post_output = gr.Markdown(label="Salida")
172
 
173
- btn_post_model = gr.Button(value="Publicar")
174
  btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
175
 
176
- # with gr.Column():
177
- # model_voice_path07 = gr.Dropdown(
178
- # label=i18n("RVC Model:"),
179
- # choices=sorted(names),
180
- # value=default_weight,
181
- # )
182
- # best_match_index_path1, _ = match_index(
183
- # model_voice_path07.value
184
- # )
185
-
186
- # file_index2_07 = gr.Dropdown(
187
- # label=i18n("Select the .index file:"),
188
- # choices=get_indexes(),
189
- # value=best_match_index_path1,
190
- # interactive=True,
191
- # allow_custom_value=True,
192
- # )
193
- # with gr.Row():
194
- # refresh_button_ = gr.Button(i18n("Refresh"), variant="primary")
195
- # refresh_button_.click(
196
- # fn=change_choices2,
197
- # inputs=[],
198
- # outputs=[model_voice_path07, file_index2_07],
199
- # )
200
- # with gr.Row():
201
- # original_ttsvoice = gr.Audio(label=i18n("Audio TTS:"))
202
- # ttsvoice = gr.Audio(label=i18n("Audio RVC:"))
203
-
204
- # with gr.Row():
205
- # button_test = gr.Button(i18n("Convert"), variant="primary")
206
-
207
- # button_test.click(
208
- # tts.use_tts,
209
- # inputs=[
210
- # text_test,
211
- # tts_test,
212
- # model_voice_path07,
213
- # file_index2_07,
214
- # # transpose_test,
215
- # vc_transform0,
216
- # f0method8,
217
- # index_rate1,
218
- # crepe_hop_length,
219
- # f0_autotune,
220
- # ttsmethod_test,
221
- # ],
222
- # outputs=[ttsvoice, original_ttsvoice],
223
- # )
224
-
225
-
226
-
227
  app.queue(concurrency_count=200, max_size=1022).launch()
228
- #share=True
 
12
 
13
  def convert_yt_to_wav(url):
14
  if not url:
15
+ return "Please enter the video link", None
16
 
17
  try:
18
+ print(f"Converting video {url}...")
19
+ # Download the video using pytube
20
  video = pytube.YouTube(url)
21
  stream = video.streams.filter(only_audio=True).first()
22
+ video_output_folder = os.path.join(f"yt_videos") # Destination folder path
23
  audio_output_folder = 'audios'
24
 
25
  print("Downloading video")
 
29
  file_name = os.path.basename(video_file_path)
30
 
31
  audio_file_path = os.path.join(audio_output_folder, file_name.replace('.mp4','.wav'))
32
+ # Convert mp4 to wav
33
  print("Converting to wav")
34
+ sound = AudioSegment.from_file(video_file_path, format="mp4")
35
  sound.export(audio_file_path, format="wav")
36
 
37
  if os.path.exists(video_file_path):
 
39
 
40
  return "Success", audio_file_path
41
  except ConnectionResetError as cre:
42
+ return "Connection lost, please refresh or try again later.", None
43
  except Exception as e:
44
  return str(e), None
45
 
46
  with gr.Blocks() as app:
47
  gr.HTML("<h1> Simple RVC Inference - by Juuxn 💻 </h1>")
48
 
49
+ gr.HTML("<h4> The current space uses only CPU, so it's only for inference. It is recommended to duplicate the space to avoid issues with processing queues. </h4>")
50
 
51
+ gr.Markdown("Simple RVC GPU Inference on Colab: [![Open In Colab](https://img.shields.io/badge/Colab-F9AB00?style=for-the-badge&logo=googlecolab&color=525252)](https://colab.research.google.com/drive/1NKqqTR04HujeBxzwe7jbYEvNi8LbxD_N?usp=sharing)")
52
  gr.Markdown(
53
  "[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm-dark.svg)](https://huggingface.co/spaces/juuxn/SimpleRVC?duplicate=true)\n\n"
54
  )
55
 
56
+ gr.Markdown("Collection of models you can use: RVC + AI Kits. **[RVC Community Models](https://docs.google.com/spreadsheets/d/1owfUtQuLW9ReiIwg6U9UkkDmPOTkuNHf0OKQtWu1iaI)**")
57
 
58
+ with gr.Tab("Inference"):
59
+ model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
60
  with gr.Row():
61
  with gr.Column():
62
+ audio_path = gr.Audio(label="Audio File", show_label=True, type="filepath")
63
+ index_rate = gr.Slider(minimum=0, maximum=1, label="Search feature ratio:", value=0.75, interactive=True)
64
+ filter_radius1 = gr.Slider(minimum=0, maximum=7, label="Filter (breathing roughness reduction)", value=3, step=1, interactive=True)
65
  with gr.Column():
66
  f0_method = gr.Dropdown(choices=["harvest", "pm", "crepe", "crepe-tiny", "mangio-crepe", "mangio-crepe-tiny", "rmvpe"],
67
  value="rmvpe",
68
+ label="Algorithm", show_label=True)
69
+ vc_transform0 = gr.Slider(minimum=-12, label="Number of semitones, up an octave: 12, down an octave: -12", value=0, maximum=12, step=1)
70
  protect0 = gr.Slider(
71
+ minimum=0, maximum=0.5, label="Protect voiceless consonants and breathing sounds. 0.5 to disable.", value=0.33,
72
  step=0.01,
73
+ interactive=True,
74
  )
75
  resample_sr1 = gr.Slider(
76
  minimum=0,
77
  maximum=48000,
78
+ label="Resample the output audio to the final sampling rate. 0 for no resampling.",
79
  value=0,
80
  step=1,
81
  interactive=True,
82
  )
83
 
84
+ # Output
85
  with gr.Row():
86
+ vc_output1 = gr.Textbox(label="Output")
87
+ vc_output2 = gr.Audio(label="Output Audio")
88
 
89
+ btn = gr.Button(value="Convert")
90
  btn.click(infer, inputs=[model_url, f0_method, audio_path, index_rate, vc_transform0, protect0, resample_sr1, filter_radius1], outputs=[vc_output1, vc_output2])
91
 
92
  with gr.TabItem("TTS"):
93
  with gr.Row():
94
  tts_text = gr.Textbox(
95
+ label="Text:",
96
+ placeholder="Text you want to convert to speech...",
97
  lines=6,
98
  )
99
 
100
  with gr.Column():
101
  with gr.Row():
102
+ tts_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="RVC Model URL", show_label=True)
103
 
104
  with gr.Row():
105
+ tts_method = gr.Dropdown(choices=VOICE_METHODS, value="Edge-tts", label="TTS Method:", visible=True)
106
+ tts_model = gr.Dropdown(choices=EDGE_VOICES, label="TTS Model:", visible=True, interactive=True)
107
+ tts_api_key = gr.Textbox(label="ElevenLabs API Key", show_label=True, placeholder="4a4afce72349680c8e8b6fdcfaf2b65a", interactive=True, visible=False)
108
 
109
  tts_coqui_languages = gr.Radio(
110
  label="Language",
 
113
  visible=False
114
  )
115
 
116
+ tts_btn = gr.Button(value="Convert")
117
 
118
  with gr.Row():
119
+ tts_vc_output1 = gr.Textbox(label="Output")
120
+ tts_vc_output2 = gr.Audio(label="Output Audio")
121
 
122
  tts_btn.click(fn=tts_infer, inputs=[tts_text, tts_model_url, tts_method, tts_model, tts_api_key, tts_coqui_languages], outputs=[tts_vc_output1, tts_vc_output2])
123
 
124
+ tts_msg = gr.Markdown("""**I recommend creating an Eleven Labs account and entering your API key; it's free and you have a limit of 10k characters per month.** <br/>
125
  ![Imgur](https://imgur.com/HH6YTu0.png)
126
  """, visible=False)
127
 
128
  tts_method.change(fn=update_tts_methods_voice, inputs=[tts_method], outputs=[tts_model, tts_msg, tts_api_key, tts_coqui_languages])
129
 
130
  with gr.TabItem("Youtube"):
131
+ gr.Markdown("## Convert YouTube video to audio")
132
  with gr.Row():
133
  yt_url = gr.Textbox(
134
+ label="Video URL:",
135
  placeholder="https://www.youtube.com/watch?v=3vEiqil5d3Q"
136
  )
137
+ yt_btn = gr.Button(value="Convert")
138
 
139
  with gr.Row():
140
+ yt_output1 = gr.Textbox(label="Output")
141
+ yt_output2 = gr.Audio(label="Output Audio")
142
 
143
  yt_btn.click(fn=convert_yt_to_wav, inputs=[yt_url], outputs=[yt_output1, yt_output2])
144
 
145
+ with gr.Tab("Models"):
146
+ gr.HTML("<h4>Search models</h4>")
147
+ search_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True)
148
+ # Output
 
 
 
 
 
 
 
 
149
  with gr.Row():
150
+ search_output = gr.Markdown(label="Output")
151
 
152
+ btn_search_model = gr.Button(value="Search")
153
+ btn_search_model.click(fn=search_model, inputs=[search_name], outputs=[search_output])
154
 
155
+ gr.HTML("<h4>Submit your model</h4>")
156
+ post_name = gr.Textbox(placeholder="Billie Eilish (RVC v2 - 100 epoch)", label="Name", show_label=True)
157
+ post_model_url = gr.Textbox(placeholder="https://huggingface.co/AIVER-SE/BillieEilish/resolve/main/BillieEilish.zip", label="Model URL", show_label=True)
158
+ post_creator = gr.Textbox(placeholder="Discord ID or link to creator's profile", label="Creator", show_label=True)
159
+ post_version = gr.Dropdown(choices=["RVC v1", "RVC v2"], value="RVC v1", label="Version", show_label=True)
160
 
161
+ # Output
162
  with gr.Row():
163
+ post_output = gr.Markdown(label="Output")
164
 
165
+ btn_post_model = gr.Button(value="Post")
166
  btn_post_model.click(fn=post_model, inputs=[post_name, post_model_url, post_version, post_creator], outputs=[post_output])
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  app.queue(concurrency_count=200, max_size=1022).launch()
169
+ #share=True