Spaces:

BoldActionMan
/

Video-Translator-with-Voice-Cloning-and-Subtitles

Running

App Files Files Community

BoldActionMan commited on Aug 28, 2024

Commit

7ce119a

·

verified ·

1 Parent(s): c00e8d2

Update app.py

Files changed (1) hide show

app.py +5 -4

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import torch
 from openvoice import se_extractor
 from openvoice.api import ToneColorConverter
 import whisper
-from moviepy import *
 from pydub import AudioSegment
 from df.enhance import enhance, init_df, load_audio, save_audio
 import translators as ts
@@ -52,7 +52,7 @@ def process_video(video_file, language_choice):
     audio = AudioSegment.from_file(reference_audio)
     resampled_audio = audio.set_frame_rate(48000)
     resampled_audio.export(reference_audio, format="wav")
-    reference_audio = AudioClip(reference_audio)
     # Enhance the audio
     model, df_state, _ = init_df()
@@ -159,8 +159,9 @@ def process_video(video_file, language_choice):
                 segment_path = os.path.join(output_dir, f'segment_{start}_{end}.wav')
                 model.tts_to_file(translated_text, speaker_id, segment_path, speed=speed)
-                reference_speaker = reference_audio.subclip(int(start), int(end))  # This is the voice you want to clone [int(start):int(end)]
-                target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=False)
                 # Run the tone color converter
                 encode_message = "@MyShell"
                 tone_color_converter.convert(

 from openvoice import se_extractor
 from openvoice.api import ToneColorConverter
 import whisper
+from moviepy.editor import *
 from pydub import AudioSegment
 from df.enhance import enhance, init_df, load_audio, save_audio
 import translators as ts
     audio = AudioSegment.from_file(reference_audio)
     resampled_audio = audio.set_frame_rate(48000)
     resampled_audio.export(reference_audio, format="wav")
+    audio_clip = AudioFileClip(reference_audio)
     # Enhance the audio
     model, df_state, _ = init_df()
                 segment_path = os.path.join(output_dir, f'segment_{start}_{end}.wav')
                 model.tts_to_file(translated_text, speaker_id, segment_path, speed=speed)
+                reference_speaker = AudioFileClip.subclip(audio_clip, int(start), int(end))  # This is the voice you want to clone
+                reference_speaker.write_audiofile("reference_speaker.wav")
+                target_se, audio_name = se_extractor.get_se("reference_speaker.wav", tone_color_converter, vad=False)
                 # Run the tone color converter
                 encode_message = "@MyShell"
                 tone_color_converter.convert(