artificialguybr commited on
Commit
80b43a8
1 Parent(s): e7c3f3f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -25
app.py CHANGED
@@ -2,6 +2,7 @@ import tempfile
2
  import gradio as gr
3
  import subprocess
4
  import os, stat
 
5
  from googletrans import Translator
6
  from TTS.api import TTS
7
  import ffmpeg
@@ -13,8 +14,6 @@ import numpy as np
13
  import librosa
14
  from zipfile import ZipFile
15
  import shlex
16
- import librosa
17
- import numpy as np
18
  import cv2
19
  import torch
20
  import torchvision
@@ -28,39 +27,46 @@ st = os.stat('ffmpeg')
28
  os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
29
 
30
  def process_video(video, high_quality, target_language):
31
- output_filename = "resized_video.mp4"
 
 
 
 
 
 
 
 
32
  if high_quality:
33
  ffmpeg.input(video).output(output_filename, vf='scale=-1:720').run()
34
  video_path = output_filename
35
  else:
36
  video_path = video
37
 
38
- # Debugging Step 1: Check if video_path exists
39
  if not os.path.exists(video_path):
40
  return f"Error: {video_path} does not exist."
41
 
42
- ffmpeg.input(video_path).output('output_audio.wav', acodec='pcm_s24le', ar=48000, map='a').run()
43
 
44
- y, sr = sf.read("output_audio.wav")
45
  y = y.astype(np.float32)
46
  y_denoised = wiener(y)
47
- sf.write("output_audio_denoised.wav", y_denoised, sr)
48
 
49
- sound = AudioSegment.from_file("output_audio_denoised.wav", format="wav")
50
- sound = sound.apply_gain(0) # Reduce gain by 5 dB
51
  sound = sound.low_pass_filter(3000).high_pass_filter(100)
52
- sound.export("output_audio_processed.wav", format="wav")
53
 
54
- shell_command = f"ffmpeg -y -i output_audio_processed.wav -af lowpass=3000,highpass=100 output_audio_final.wav".split(" ")
55
  subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
56
 
57
  model = whisper.load_model("base")
58
- result = model.transcribe("output_audio_final.wav")
59
  whisper_text = result["text"]
60
  whisper_language = result['language']
61
-
62
  print(whisper_text)
63
-
64
  language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
65
  target_language_code = language_mapping[target_language]
66
  translator = Translator()
@@ -71,11 +77,9 @@ def process_video(video, high_quality, target_language):
71
  print("Failed to translate text. Likely an issue with token extraction in the Google Translate API.")
72
  translated_text = "Translation failed due to API issue."
73
 
74
-
75
-
76
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
77
- tts.to('cuda') # Replacing deprecated gpu=True
78
- tts.tts_to_file(translated_text, speaker_wav='output_audio_final.wav', file_path="output_synth.wav", language=target_language_code)
79
 
80
  pad_top = 0
81
  pad_bottom = 15
@@ -83,15 +87,33 @@ def process_video(video, high_quality, target_language):
83
  pad_right = 0
84
  rescaleFactor = 1
85
 
86
- # Debugging Step 2: Remove quotes around the video path
87
  video_path_fix = video_path
88
 
89
- cmd = f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path_fix)} --audio 'output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile 'output_video.mp4'"
90
  subprocess.run(cmd, shell=True)
91
- # Debugging Step 3: Check if output video exists
92
- if not os.path.exists("output_video.mp4"):
93
- raise FileNotFoundError("Error: output_video.mp4 was not generated.")
94
- return "output_video.mp4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  iface = gr.Interface(
97
  fn=process_video,
@@ -100,7 +122,7 @@ iface = gr.Interface(
100
  gr.inputs.Checkbox(label="High Quality"),
101
  gr.inputs.Dropdown(choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", "Polish", "Turkish", "Russian", "Dutch", "Czech", "Arabic", "Chinese (Simplified)"], label="Target Language for Dubbing")
102
  ],
103
- outputs=gr.outputs.File(),
104
  live=False
105
  )
106
 
 
2
  import gradio as gr
3
  import subprocess
4
  import os, stat
5
+ import uuid
6
  from googletrans import Translator
7
  from TTS.api import TTS
8
  import ffmpeg
 
14
  import librosa
15
  from zipfile import ZipFile
16
  import shlex
 
 
17
  import cv2
18
  import torch
19
  import torchvision
 
27
  os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)
28
 
29
  def process_video(video, high_quality, target_language):
30
+ # Check video duration
31
+ video_info = ffmpeg.probe(video)
32
+ video_duration = float(video_info['streams'][0]['duration'])
33
+ if video_duration > 90:
34
+ return gr.Interface.Warnings("Video duration exceeds 1 minute and 30 seconds. Please upload a shorter video.")
35
+
36
+ run_uuid = uuid.uuid4().hex[:6]
37
+ output_filename = f"{run_uuid}_resized_video.mp4"
38
+
39
  if high_quality:
40
  ffmpeg.input(video).output(output_filename, vf='scale=-1:720').run()
41
  video_path = output_filename
42
  else:
43
  video_path = video
44
 
 
45
  if not os.path.exists(video_path):
46
  return f"Error: {video_path} does not exist."
47
 
48
+ ffmpeg.input(video_path).output(f"{run_uuid}_output_audio.wav", acodec='pcm_s24le', ar=48000, map='a').run()
49
 
50
+ y, sr = sf.read(f"{run_uuid}_output_audio.wav")
51
  y = y.astype(np.float32)
52
  y_denoised = wiener(y)
53
+ sf.write(f"{run_uuid}_output_audio_denoised.wav", y_denoised, sr)
54
 
55
+ sound = AudioSegment.from_file(f"{run_uuid}_output_audio_denoised.wav", format="wav")
56
+ sound = sound.apply_gain(0)
57
  sound = sound.low_pass_filter(3000).high_pass_filter(100)
58
+ sound.export(f"{run_uuid}_output_audio_processed.wav", format="wav")
59
 
60
+ shell_command = f"ffmpeg -y -i {run_uuid}_output_audio_processed.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
61
  subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
62
 
63
  model = whisper.load_model("base")
64
+ result = model.transcribe(f"{run_uuid}_output_audio_final.wav")
65
  whisper_text = result["text"]
66
  whisper_language = result['language']
67
+
68
  print(whisper_text)
69
+
70
  language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
71
  target_language_code = language_mapping[target_language]
72
  translator = Translator()
 
77
  print("Failed to translate text. Likely an issue with token extraction in the Google Translate API.")
78
  translated_text = "Translation failed due to API issue."
79
 
 
 
80
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
81
+ tts.to('cuda')
82
+ tts.tts_to_file(translated_text, speaker_wav=f"{run_uuid}_output_audio_final.wav", file_path=f"{run_uuid}_output_synth.wav", language=target_language_code)
83
 
84
  pad_top = 0
85
  pad_bottom = 15
 
87
  pad_right = 0
88
  rescaleFactor = 1
89
 
 
90
  video_path_fix = video_path
91
 
92
+ cmd = f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path_fix)} --audio '{run_uuid}_output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile '{run_uuid}_output_video.mp4'"
93
  subprocess.run(cmd, shell=True)
94
+
95
+ if not os.path.exists(f"{run_uuid}_output_video.mp4"):
96
+ raise FileNotFoundError(f"Error: {run_uuid}_output_video.mp4 was not generated.")
97
+
98
+ output_video_path = f"{run_uuid}_output_video.mp4"
99
+
100
+ # Cleanup: Delete all generated files except the final output video
101
+ files_to_delete = [
102
+ f"{run_uuid}_resized_video.mp4",
103
+ f"{run_uuid}_output_audio.wav",
104
+ f"{run_uuid}_output_audio_denoised.wav",
105
+ f"{run_uuid}_output_audio_processed.wav",
106
+ f"{run_uuid}_output_audio_final.wav",
107
+ f"{run_uuid}_output_synth.wav"
108
+ ]
109
+
110
+ for file in files_to_delete:
111
+ try:
112
+ os.remove(file)
113
+ except FileNotFoundError:
114
+ print(f"File {file} not found for deletion.")
115
+
116
+ return output_video_path
117
 
118
  iface = gr.Interface(
119
  fn=process_video,
 
122
  gr.inputs.Checkbox(label="High Quality"),
123
  gr.inputs.Dropdown(choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", "Polish", "Turkish", "Russian", "Dutch", "Czech", "Arabic", "Chinese (Simplified)"], label="Target Language for Dubbing")
124
  ],
125
+ outputs=gr.outputs.Video(),
126
  live=False
127
  )
128