Spanicin commited on
Commit
44dcd57
1 Parent(s): 7bb8883

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -88
app.py CHANGED
@@ -226,100 +226,103 @@ def generate_video():
226
  global TEMP_DIR
227
  TEMP_DIR = create_temp_dir()
228
  print('request:',request.method)
229
- if request.method == 'POST':
230
- source_image = request.files['source_image']
231
- text_prompt = request.form['text_prompt']
232
- print('Input text prompt: ',text_prompt)
233
- voice_cloning = request.form.get('voice_cloning', 'no')
234
- target_language = request.form.get('target_language', 'original_text')
235
- print('target_language',target_language)
236
- pose_style = int(request.form.get('pose_style', 1))
237
- expression_scale = float(request.form.get('expression_scale', 1))
238
- enhancer = request.form.get('enhancer', None)
239
- voice_gender = request.form.get('voice_gender', 'male')
240
- still_str = request.form.get('still', 'False')
241
- still = still_str.lower() == 'true'
242
- print('still', still)
243
- preprocess = request.form.get('preprocess', 'crop')
244
- print('preprocess selected: ',preprocess)
245
- ref_pose_video = request.files.get('ref_pose', None)
246
-
247
- if target_language != 'original_text':
248
- response = translate_text(text_prompt, target_language)
249
- # response = await translate_text_async(text_prompt, target_language)
250
- text_prompt = response.choices[0].message.content.strip()
251
-
252
- app.config['text_prompt'] = text_prompt
253
- print('Final text prompt: ',text_prompt)
254
-
255
- source_image_path = save_uploaded_file(source_image, 'source_image.png',TEMP_DIR)
256
- print(source_image_path)
257
-
258
- # driven_audio_path = await voice_cloning_async(voice_cloning, voice_gender, text_prompt, user_voice)
259
-
260
- if voice_cloning == 'no':
261
- if voice_gender == 'male':
262
- voice = 'echo'
263
- print('Entering Audio creation using elevenlabs')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  set_api_key("92e149985ea2732b4359c74346c3daee")
 
 
265
 
266
- audio = generate(text = text_prompt, voice = "Daniel", model = "eleven_multilingual_v2",stream=True, latency=4)
267
- with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
268
  for chunk in audio:
269
  temp_file.write(chunk)
270
  driven_audio_path = temp_file.name
271
  print('driven_audio_path',driven_audio_path)
272
- print('Audio file saved using elevenlabs')
273
-
274
- else:
275
- voice = 'nova'
276
-
277
- print('Entering Audio creation using whisper')
278
- response = client.audio.speech.create(model="tts-1-hd",
279
- voice=voice,
280
- input = text_prompt)
281
-
282
- print('Audio created using whisper')
283
- with tempfile.NamedTemporaryFile(suffix=".wav", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
284
- driven_audio_path = temp_file.name
285
-
286
- response.write_to_file(driven_audio_path)
287
- print('Audio file saved using whisper')
288
-
289
- elif voice_cloning == 'yes':
290
- user_voice = request.files['user_voice']
291
-
292
- with tempfile.NamedTemporaryFile(suffix=".wav", prefix="user_voice_",dir=TEMP_DIR.name, delete=False) as temp_file:
293
- user_voice_path = temp_file.name
294
- user_voice.save(user_voice_path)
295
- print('user_voice_path',user_voice_path)
296
-
297
- set_api_key("92e149985ea2732b4359c74346c3daee")
298
- voice = clone(name = "User Cloned Voice",
299
- files = [user_voice_path] )
300
-
301
- audio = generate(text = text_prompt, voice = voice, model = "eleven_multilingual_v2",stream=True, latency=4)
302
- with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
303
- for chunk in audio:
304
- temp_file.write(chunk)
305
- driven_audio_path = temp_file.name
306
- print('driven_audio_path',driven_audio_path)
307
-
308
- # elevenlabs.save(audio, driven_audio_path)
309
-
310
- save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)
311
- result_folder = os.path.join(save_dir, "results")
312
- os.makedirs(result_folder, exist_ok=True)
313
-
314
- ref_pose_video_path = None
315
- if ref_pose_video:
316
- with tempfile.NamedTemporaryFile(suffix=".mp4", prefix="ref_pose_",dir=TEMP_DIR.name, delete=False) as temp_file:
317
- ref_pose_video_path = temp_file.name
318
- ref_pose_video.save(ref_pose_video_path)
319
- print('ref_pose_video_path',ref_pose_video_path)
320
-
321
- print("driven_audio_path: ")
322
- print(driven_audio_path)
323
  # Example of using the class with some hypothetical paths
324
  args = AnimationConfig(driven_audio_path=driven_audio_path, source_image_path=source_image_path, result_folder=result_folder, pose_style=pose_style, expression_scale=expression_scale, enhancer=enhancer,still=still,preprocess=preprocess,ref_pose_video_path=ref_pose_video_path)
325
 
 
226
  global TEMP_DIR
227
  TEMP_DIR = create_temp_dir()
228
  print('request:',request.method)
229
+ try:
230
+ if request.method == 'POST':
231
+ source_image = request.files['source_image']
232
+ text_prompt = request.form['text_prompt']
233
+ print('Input text prompt: ',text_prompt)
234
+ voice_cloning = request.form.get('voice_cloning', 'no')
235
+ target_language = request.form.get('target_language', 'original_text')
236
+ print('target_language',target_language)
237
+ pose_style = int(request.form.get('pose_style', 1))
238
+ expression_scale = float(request.form.get('expression_scale', 1))
239
+ enhancer = request.form.get('enhancer', None)
240
+ voice_gender = request.form.get('voice_gender', 'male')
241
+ still_str = request.form.get('still', 'False')
242
+ still = still_str.lower() == 'true'
243
+ print('still', still)
244
+ preprocess = request.form.get('preprocess', 'crop')
245
+ print('preprocess selected: ',preprocess)
246
+ ref_pose_video = request.files.get('ref_pose', None)
247
+
248
+ if target_language != 'original_text':
249
+ response = translate_text(text_prompt, target_language)
250
+ # response = await translate_text_async(text_prompt, target_language)
251
+ text_prompt = response.choices[0].message.content.strip()
252
+
253
+ app.config['text_prompt'] = text_prompt
254
+ print('Final text prompt: ',text_prompt)
255
+
256
+ source_image_path = save_uploaded_file(source_image, 'source_image.png',TEMP_DIR)
257
+ print(source_image_path)
258
+
259
+ # driven_audio_path = await voice_cloning_async(voice_cloning, voice_gender, text_prompt, user_voice)
260
+
261
+ if voice_cloning == 'no':
262
+ if voice_gender == 'male':
263
+ voice = 'echo'
264
+ print('Entering Audio creation using elevenlabs')
265
+ set_api_key("92e149985ea2732b4359c74346c3daee")
266
+
267
+ audio = generate(text = text_prompt, voice = "Daniel", model = "eleven_multilingual_v2",stream=True, latency=4)
268
+ with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
269
+ for chunk in audio:
270
+ temp_file.write(chunk)
271
+ driven_audio_path = temp_file.name
272
+ print('driven_audio_path',driven_audio_path)
273
+ print('Audio file saved using elevenlabs')
274
+
275
+ else:
276
+ voice = 'nova'
277
+
278
+ print('Entering Audio creation using whisper')
279
+ response = client.audio.speech.create(model="tts-1-hd",
280
+ voice=voice,
281
+ input = text_prompt)
282
+
283
+ print('Audio created using whisper')
284
+ with tempfile.NamedTemporaryFile(suffix=".wav", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
285
+ driven_audio_path = temp_file.name
286
+
287
+ response.write_to_file(driven_audio_path)
288
+ print('Audio file saved using whisper')
289
+
290
+ elif voice_cloning == 'yes':
291
+ user_voice = request.files['user_voice']
292
+
293
+ with tempfile.NamedTemporaryFile(suffix=".wav", prefix="user_voice_",dir=TEMP_DIR.name, delete=False) as temp_file:
294
+ user_voice_path = temp_file.name
295
+ user_voice.save(user_voice_path)
296
+ print('user_voice_path',user_voice_path)
297
+
298
  set_api_key("92e149985ea2732b4359c74346c3daee")
299
+ voice = clone(name = "User Cloned Voice",
300
+ files = [user_voice_path] )
301
 
302
+ audio = generate(text = text_prompt, voice = voice, model = "eleven_multilingual_v2",stream=True, latency=4)
303
+ with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
304
  for chunk in audio:
305
  temp_file.write(chunk)
306
  driven_audio_path = temp_file.name
307
  print('driven_audio_path',driven_audio_path)
308
+
309
+ # elevenlabs.save(audio, driven_audio_path)
310
+
311
+ save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)
312
+ result_folder = os.path.join(save_dir, "results")
313
+ os.makedirs(result_folder, exist_ok=True)
314
+
315
+ ref_pose_video_path = None
316
+ if ref_pose_video:
317
+ with tempfile.NamedTemporaryFile(suffix=".mp4", prefix="ref_pose_",dir=TEMP_DIR.name, delete=False) as temp_file:
318
+ ref_pose_video_path = temp_file.name
319
+ ref_pose_video.save(ref_pose_video_path)
320
+ print('ref_pose_video_path',ref_pose_video_path)
321
+
322
+ except Exception as e:
323
+ app.logger.error(f"An error occurred: {e}")
324
+ return "An error occurred", 500
325
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  # Example of using the class with some hypothetical paths
327
  args = AnimationConfig(driven_audio_path=driven_audio_path, source_image_path=source_image_path, result_folder=result_folder, pose_style=pose_style, expression_scale=expression_scale, enhancer=enhancer,still=still,preprocess=preprocess,ref_pose_video_path=ref_pose_video_path)
328