kevinwang676 commited on
Commit
53bed0d
1 Parent(s): 4e3a496

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +199 -0
app.py CHANGED
@@ -18,6 +18,11 @@ import torch
18
  import librosa
19
  import util
20
 
 
 
 
 
 
21
  from config import device
22
  from infer_pack.models import (
23
  SynthesizerTrnMs256NSFsid,
@@ -99,6 +104,186 @@ for model_name in config_json.get('models'):
99
  ))
100
  print(f'Models loaded: {len(loaded_models)}')
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  # Command line test
103
  def command_line_test():
104
  command = "df -h /home/user/app"
@@ -408,6 +593,20 @@ with gr.Blocks(theme=theme, css=css) as App:
408
  new_song = gr.Audio(label="Full song", type="filepath")
409
  vc_audio_submit.click(fn=voice_changer, inputs=[vc_audio_input, vc_model_index, vc_pitch_adjust, vc_f0_method, vc_feat_ratio], outputs=[vc_audio_output, vc_audio_message], show_progress=True, queue=True)
410
  full_song.click(fn=mix, inputs=[vc_audio_output, as_audio_no_vocals], outputs=[new_song])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
412
  gr.HTML('''
413
  <div class="footer">
 
18
  import librosa
19
  import util
20
 
21
+ import matplotlib.pyplot as plt
22
+ from PIL import Image, ImageDraw, ImageFont
23
+ from moviepy.editor import *
24
+ from moviepy.video.io.VideoFileClip import VideoFileClip
25
+
26
  from config import device
27
  from infer_pack.models import (
28
  SynthesizerTrnMs256NSFsid,
 
104
  ))
105
  print(f'Models loaded: {len(loaded_models)}')
106
 
107
+ def make_bars_image(height_values, index, new_height):
108
+
109
+ # Define the size of the image
110
+ width = 512
111
+ height = new_height
112
+
113
+ # Create a new image with a transparent background
114
+ image = Image.new('RGBA', (width, height), color=(0, 0, 0, 0))
115
+
116
+ # Get the image drawing context
117
+ draw = ImageDraw.Draw(image)
118
+
119
+ # Define the rectangle width and spacing
120
+ rect_width = 2
121
+ spacing = 2
122
+
123
+ # Define the list of height values for the rectangles
124
+ #height_values = [20, 40, 60, 80, 100, 80, 60, 40]
125
+ num_bars = len(height_values)
126
+ # Calculate the total width of the rectangles and the spacing
127
+ total_width = num_bars * rect_width + (num_bars - 1) * spacing
128
+
129
+ # Calculate the starting position for the first rectangle
130
+ start_x = int((width - total_width) / 2)
131
+ # Define the buffer size
132
+ buffer_size = 80
133
+ # Draw the rectangles from left to right
134
+ x = start_x
135
+ for i, height in enumerate(height_values):
136
+
137
+ # Define the rectangle coordinates
138
+ y0 = buffer_size
139
+ y1 = height + buffer_size
140
+ x0 = x
141
+ x1 = x + rect_width
142
+
143
+ # Draw the rectangle
144
+ draw.rectangle([x0, y0, x1, y1], fill='white')
145
+
146
+ # Move to the next rectangle position
147
+ if i < num_bars - 1:
148
+ x += rect_width + spacing
149
+
150
+
151
+ # Rotate the image by 180 degrees
152
+ image = image.rotate(180)
153
+
154
+ # Mirror the image
155
+ image = image.transpose(Image.FLIP_LEFT_RIGHT)
156
+
157
+ # Save the image
158
+ image.save('audio_bars_'+ str(index) + '.png')
159
+
160
+ return 'audio_bars_'+ str(index) + '.png'
161
+
162
+ def db_to_height(db_value):
163
+ # Scale the dB value to a range between 0 and 1
164
+ scaled_value = (db_value + 80) / 80
165
+
166
+ # Convert the scaled value to a height between 0 and 100
167
+ height = scaled_value * 50
168
+
169
+ return height
170
+
171
+ def infer(title, audio_in, image_in):
172
+ # Load the audio file
173
+ audio_path = audio_in
174
+ audio_data, sr = librosa.load(audio_path)
175
+
176
+ # Get the duration in seconds
177
+ duration = librosa.get_duration(y=audio_data, sr=sr)
178
+
179
+ # Extract the audio data for the desired time
180
+ start_time = 0 # start time in seconds
181
+ end_time = duration # end time in seconds
182
+
183
+ start_index = int(start_time * sr)
184
+ end_index = int(end_time * sr)
185
+
186
+ audio_data = audio_data[start_index:end_index]
187
+
188
+ # Compute the short-time Fourier transform
189
+ hop_length = 512
190
+
191
+
192
+ stft = librosa.stft(audio_data, hop_length=hop_length)
193
+ spectrogram = librosa.amplitude_to_db(np.abs(stft), ref=np.max)
194
+
195
+ # Get the frequency values
196
+ freqs = librosa.fft_frequencies(sr=sr, n_fft=stft.shape[0])
197
+
198
+ # Select the indices of the frequency values that correspond to the desired frequencies
199
+ n_freqs = 114
200
+ freq_indices = np.linspace(0, len(freqs) - 1, n_freqs, dtype=int)
201
+
202
+ # Extract the dB values for the desired frequencies
203
+ db_values = []
204
+ for i in range(spectrogram.shape[1]):
205
+ db_values.append(list(zip(freqs[freq_indices], spectrogram[freq_indices, i])))
206
+
207
+ # Print the dB values for the first time frame
208
+ print(db_values[0])
209
+
210
+ proportional_values = []
211
+
212
+ for frame in db_values:
213
+ proportional_frame = [db_to_height(db) for f, db in frame]
214
+ proportional_values.append(proportional_frame)
215
+
216
+ print(proportional_values[0])
217
+ print("AUDIO CHUNK: " + str(len(proportional_values)))
218
+
219
+ # Open the background image
220
+ background_image = Image.open(image_in)
221
+
222
+ # Resize the image while keeping its aspect ratio
223
+ bg_width, bg_height = background_image.size
224
+ aspect_ratio = bg_width / bg_height
225
+ new_width = 512
226
+ new_height = int(new_width / aspect_ratio)
227
+ resized_bg = background_image.resize((new_width, new_height))
228
+
229
+ # Apply black cache for better visibility of the white text
230
+ bg_cache = Image.open('black_cache.png')
231
+ resized_bg.paste(bg_cache, (0, resized_bg.height - bg_cache.height), mask=bg_cache)
232
+
233
+ # Create a new ImageDraw object
234
+ draw = ImageDraw.Draw(resized_bg)
235
+
236
+ # Define the text to be added
237
+ text = title
238
+ font = ImageFont.truetype("Lato-Regular.ttf", 16)
239
+ text_color = (255, 255, 255) # white color
240
+
241
+ # Calculate the position of the text
242
+ text_width, text_height = draw.textsize(text, font=font)
243
+ x = 30
244
+ y = new_height - 70
245
+
246
+ # Draw the text on the image
247
+ draw.text((x, y), text, fill=text_color, font=font)
248
+
249
+ # Save the resized image
250
+ resized_bg.save('resized_background.jpg')
251
+
252
+ generated_frames = []
253
+ for i, frame in enumerate(proportional_values):
254
+ bars_img = make_bars_image(frame, i, new_height)
255
+ bars_img = Image.open(bars_img)
256
+ # Paste the audio bars image on top of the background image
257
+ fresh_bg = Image.open('resized_background.jpg')
258
+ fresh_bg.paste(bars_img, (0, 0), mask=bars_img)
259
+ # Save the image
260
+ fresh_bg.save('audio_bars_with_bg' + str(i) + '.jpg')
261
+ generated_frames.append('audio_bars_with_bg' + str(i) + '.jpg')
262
+ print(generated_frames)
263
+
264
+ # Create a video clip from the images
265
+ clip = ImageSequenceClip(generated_frames, fps=len(generated_frames)/(end_time-start_time))
266
+ audio_clip = AudioFileClip(audio_in)
267
+ clip = clip.set_audio(audio_clip)
268
+ # Set the output codec
269
+ codec = 'libx264'
270
+ audio_codec = 'aac'
271
+ # Save the video to a file
272
+ clip.write_videofile("my_video.mp4", codec=codec, audio_codec=audio_codec)
273
+
274
+ retimed_clip = VideoFileClip("my_video.mp4")
275
+
276
+ # Set the desired frame rate
277
+ new_fps = 25
278
+
279
+ # Create a new clip with the new frame rate
280
+ new_clip = retimed_clip.set_fps(new_fps)
281
+
282
+ # Save the new clip as a new video file
283
+ new_clip.write_videofile("my_video_retimed.mp4", codec=codec, audio_codec=audio_codec)
284
+
285
+ return "my_video_retimed.mp4"
286
+
287
  # Command line test
288
  def command_line_test():
289
  command = "df -h /home/user/app"
 
593
  new_song = gr.Audio(label="Full song", type="filepath")
594
  vc_audio_submit.click(fn=voice_changer, inputs=[vc_audio_input, vc_model_index, vc_pitch_adjust, vc_f0_method, vc_feat_ratio], outputs=[vc_audio_output, vc_audio_message], show_progress=True, queue=True)
595
  full_song.click(fn=mix, inputs=[vc_audio_output, as_audio_no_vocals], outputs=[new_song])
596
+
597
+
598
+ with gr.Tab("📺 - 音乐视频"):
599
+ with gr.Row():
600
+ with gr.Column():
601
+ inp1 = gr.Textbox(label="为视频配上精彩的文案吧(选填)")
602
+ inp2 = new_song
603
+ gr.Image(source='upload', type='filepath', label="上传一张背景图片吧")
604
+ btn = gr.Button("生成您的专属音乐视频吧")
605
+
606
+ with gr.Column():
607
+ out1 = gr.Video(label='您的专属音乐视频')
608
+ btn.click(fn=infer, inputs=[inp1, inp2], outputs=[out1])
609
+
610
  gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
611
  gr.HTML('''
612
  <div class="footer">