Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -226,100 +226,103 @@ def generate_video():
|
|
226 |
global TEMP_DIR
|
227 |
TEMP_DIR = create_temp_dir()
|
228 |
print('request:',request.method)
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
if
|
262 |
-
|
263 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
264 |
set_api_key("92e149985ea2732b4359c74346c3daee")
|
|
|
|
|
265 |
|
266 |
-
audio = generate(text = text_prompt, voice =
|
267 |
-
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="
|
268 |
for chunk in audio:
|
269 |
temp_file.write(chunk)
|
270 |
driven_audio_path = temp_file.name
|
271 |
print('driven_audio_path',driven_audio_path)
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
user_voice = request.files['user_voice']
|
291 |
-
|
292 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", prefix="user_voice_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
293 |
-
user_voice_path = temp_file.name
|
294 |
-
user_voice.save(user_voice_path)
|
295 |
-
print('user_voice_path',user_voice_path)
|
296 |
-
|
297 |
-
set_api_key("92e149985ea2732b4359c74346c3daee")
|
298 |
-
voice = clone(name = "User Cloned Voice",
|
299 |
-
files = [user_voice_path] )
|
300 |
-
|
301 |
-
audio = generate(text = text_prompt, voice = voice, model = "eleven_multilingual_v2",stream=True, latency=4)
|
302 |
-
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
303 |
-
for chunk in audio:
|
304 |
-
temp_file.write(chunk)
|
305 |
-
driven_audio_path = temp_file.name
|
306 |
-
print('driven_audio_path',driven_audio_path)
|
307 |
-
|
308 |
-
# elevenlabs.save(audio, driven_audio_path)
|
309 |
-
|
310 |
-
save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)
|
311 |
-
result_folder = os.path.join(save_dir, "results")
|
312 |
-
os.makedirs(result_folder, exist_ok=True)
|
313 |
-
|
314 |
-
ref_pose_video_path = None
|
315 |
-
if ref_pose_video:
|
316 |
-
with tempfile.NamedTemporaryFile(suffix=".mp4", prefix="ref_pose_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
317 |
-
ref_pose_video_path = temp_file.name
|
318 |
-
ref_pose_video.save(ref_pose_video_path)
|
319 |
-
print('ref_pose_video_path',ref_pose_video_path)
|
320 |
-
|
321 |
-
print("driven_audio_path: ")
|
322 |
-
print(driven_audio_path)
|
323 |
# Example of using the class with some hypothetical paths
|
324 |
args = AnimationConfig(driven_audio_path=driven_audio_path, source_image_path=source_image_path, result_folder=result_folder, pose_style=pose_style, expression_scale=expression_scale, enhancer=enhancer,still=still,preprocess=preprocess,ref_pose_video_path=ref_pose_video_path)
|
325 |
|
|
|
226 |
global TEMP_DIR
|
227 |
TEMP_DIR = create_temp_dir()
|
228 |
print('request:',request.method)
|
229 |
+
try:
|
230 |
+
if request.method == 'POST':
|
231 |
+
source_image = request.files['source_image']
|
232 |
+
text_prompt = request.form['text_prompt']
|
233 |
+
print('Input text prompt: ',text_prompt)
|
234 |
+
voice_cloning = request.form.get('voice_cloning', 'no')
|
235 |
+
target_language = request.form.get('target_language', 'original_text')
|
236 |
+
print('target_language',target_language)
|
237 |
+
pose_style = int(request.form.get('pose_style', 1))
|
238 |
+
expression_scale = float(request.form.get('expression_scale', 1))
|
239 |
+
enhancer = request.form.get('enhancer', None)
|
240 |
+
voice_gender = request.form.get('voice_gender', 'male')
|
241 |
+
still_str = request.form.get('still', 'False')
|
242 |
+
still = still_str.lower() == 'true'
|
243 |
+
print('still', still)
|
244 |
+
preprocess = request.form.get('preprocess', 'crop')
|
245 |
+
print('preprocess selected: ',preprocess)
|
246 |
+
ref_pose_video = request.files.get('ref_pose', None)
|
247 |
+
|
248 |
+
if target_language != 'original_text':
|
249 |
+
response = translate_text(text_prompt, target_language)
|
250 |
+
# response = await translate_text_async(text_prompt, target_language)
|
251 |
+
text_prompt = response.choices[0].message.content.strip()
|
252 |
+
|
253 |
+
app.config['text_prompt'] = text_prompt
|
254 |
+
print('Final text prompt: ',text_prompt)
|
255 |
+
|
256 |
+
source_image_path = save_uploaded_file(source_image, 'source_image.png',TEMP_DIR)
|
257 |
+
print(source_image_path)
|
258 |
+
|
259 |
+
# driven_audio_path = await voice_cloning_async(voice_cloning, voice_gender, text_prompt, user_voice)
|
260 |
+
|
261 |
+
if voice_cloning == 'no':
|
262 |
+
if voice_gender == 'male':
|
263 |
+
voice = 'echo'
|
264 |
+
print('Entering Audio creation using elevenlabs')
|
265 |
+
set_api_key("92e149985ea2732b4359c74346c3daee")
|
266 |
+
|
267 |
+
audio = generate(text = text_prompt, voice = "Daniel", model = "eleven_multilingual_v2",stream=True, latency=4)
|
268 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
269 |
+
for chunk in audio:
|
270 |
+
temp_file.write(chunk)
|
271 |
+
driven_audio_path = temp_file.name
|
272 |
+
print('driven_audio_path',driven_audio_path)
|
273 |
+
print('Audio file saved using elevenlabs')
|
274 |
+
|
275 |
+
else:
|
276 |
+
voice = 'nova'
|
277 |
+
|
278 |
+
print('Entering Audio creation using whisper')
|
279 |
+
response = client.audio.speech.create(model="tts-1-hd",
|
280 |
+
voice=voice,
|
281 |
+
input = text_prompt)
|
282 |
+
|
283 |
+
print('Audio created using whisper')
|
284 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", prefix="text_to_speech_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
285 |
+
driven_audio_path = temp_file.name
|
286 |
+
|
287 |
+
response.write_to_file(driven_audio_path)
|
288 |
+
print('Audio file saved using whisper')
|
289 |
+
|
290 |
+
elif voice_cloning == 'yes':
|
291 |
+
user_voice = request.files['user_voice']
|
292 |
+
|
293 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", prefix="user_voice_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
294 |
+
user_voice_path = temp_file.name
|
295 |
+
user_voice.save(user_voice_path)
|
296 |
+
print('user_voice_path',user_voice_path)
|
297 |
+
|
298 |
set_api_key("92e149985ea2732b4359c74346c3daee")
|
299 |
+
voice = clone(name = "User Cloned Voice",
|
300 |
+
files = [user_voice_path] )
|
301 |
|
302 |
+
audio = generate(text = text_prompt, voice = voice, model = "eleven_multilingual_v2",stream=True, latency=4)
|
303 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", prefix="cloned_audio_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
304 |
for chunk in audio:
|
305 |
temp_file.write(chunk)
|
306 |
driven_audio_path = temp_file.name
|
307 |
print('driven_audio_path',driven_audio_path)
|
308 |
+
|
309 |
+
# elevenlabs.save(audio, driven_audio_path)
|
310 |
+
|
311 |
+
save_dir = tempfile.mkdtemp(dir=TEMP_DIR.name)
|
312 |
+
result_folder = os.path.join(save_dir, "results")
|
313 |
+
os.makedirs(result_folder, exist_ok=True)
|
314 |
+
|
315 |
+
ref_pose_video_path = None
|
316 |
+
if ref_pose_video:
|
317 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", prefix="ref_pose_",dir=TEMP_DIR.name, delete=False) as temp_file:
|
318 |
+
ref_pose_video_path = temp_file.name
|
319 |
+
ref_pose_video.save(ref_pose_video_path)
|
320 |
+
print('ref_pose_video_path',ref_pose_video_path)
|
321 |
+
|
322 |
+
except Exception as e:
|
323 |
+
app.logger.error(f"An error occurred: {e}")
|
324 |
+
return "An error occurred", 500
|
325 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
326 |
# Example of using the class with some hypothetical paths
|
327 |
args = AnimationConfig(driven_audio_path=driven_audio_path, source_image_path=source_image_path, result_folder=result_folder, pose_style=pose_style, expression_scale=expression_scale, enhancer=enhancer,still=still,preprocess=preprocess,ref_pose_video_path=ref_pose_video_path)
|
328 |
|