LivePortrait

Running on Zero

App Files Files Community

yerang commited on Oct 7

Commit

d98c79a

•

1 Parent(s): a531730

add files

Browse files

Files changed (1) hide show

elevenlabs_utils.py +175 -0

elevenlabs_utils.py ADDED Viewed

	@@ -0,0 +1,175 @@

+import os
+from io import BytesIO
+from typing import IO, Optional
+import time
+import uuid
+from pathlib import Path
+from pydub import AudioSegment
+import gradio as gr
+from elevenlabs import Voice, VoiceSettings, save
+from elevenlabs.client import ElevenLabs
+def generate_random_filename(parent, extension="txt"):
+    """
+    Generates a random filename using UUID and current timestamp.
+    Args:
+        extension (str): The file extension for the generated filename. Default is 'txt'.
+    Returns:
+        str: A random filename with the specified extension.
+    """
+    # Generate a random UUID
+    random_uuid = uuid.uuid4()
+    # Get the current timestamp
+    timestamp = int(time.time())
+    # Combine UUID and timestamp to create a unique filename
+    filename = f"{random_uuid}_{timestamp}.{extension}"
+    file_path = os.path.join(parent, filename)
+    return file_path
+ELEVEN_LABS_MODEL = os.getenv("ELEVEN_LABS_MODEL", "eleven_multilingual_v2")
+ELEVEN_LABS_LANGUAGE_SUPPORTS = [
+    "English",
+    "Chinese",
+    "Spanish",
+    "Hindi",
+    "Portuguese",
+    "French",
+    "German",
+    "Japanese",
+    "Arabic",
+    "Korean",
+    "Indonesian",
+    "Italian",
+    "Dutch",
+    "Turkish",
+    "Polish",
+    "Swedish",
+    "Filipino",
+    "Malay",
+    "Russian",
+    "Romanian",
+    "Ukrainian",
+    "Greek",
+    "Czech",
+    "Danish",
+    "Finnish",
+    "Bulgarian",
+    "Croatian",
+    "Slovak",
+    "Tamil",
+]
+class ElevenLabsPipeline:
+    def __init__(self):
+        eleven_labs_api_key = os.getenv("ELEVENLABS_API_KEY", "sk_f4f7d77bc8065b15824cf52ea46c7d99e0e5db2a0f93b673")
+        if eleven_labs_api_key is None:
+            raise Exception("ELEVENLABS_API_KEY 환경변수를 설정해주세요.")
+        self.client = ElevenLabs(
+            api_key=eleven_labs_api_key,  # Defaults to ELEVEN_API_KEY
+        )
+        os.makedirs("./tmp", exist_ok=True)
+    def clone_voice(self, audio, name, description=None):
+        response = self.client.voices.get_all()
+        for voice in response.voices:
+            if voice.name == name:
+                return "존재하는 음성입니다. 음성 생성을 시작해주세요."
+        try:
+            voice = self.client.clone(
+                name=name,
+                description=description,  # Optional
+                files=[audio],
+            )
+            return "Voice Clone을 성공적으로 생성했습니다."
+        except Exception as e:
+            return str(e)
+    def _get_voice(self, name: str):
+        response = self.client.voices.get_all()
+        current_voice = None
+        for voice in response.voices:
+            if voice.name == name:
+                current_voice = voice
+                break
+        return current_voice
+    def generate_voice(
+        self,
+        text: str,
+        audio: str = None,
+        language: str = "ko",
+        mute_before_ms: Optional[int] = 0,
+        mute_after_ms: Optional[int] = 0,
+        stability: float = 0.5,
+        similarity_boost: float = 0.75,
+        style: float = 0.0,
+        use_speaker_boost=True,
+    ) -> str:
+        if audio is not None:
+            name = Path(audio).stem
+            self.clone_voice(audio, name)
+        else:
+            gr.Info("음성이 안주어졌습니다. 기본 음성으로 생성하겠습니다.", duration=2)
+            name = "Laura"
+        current_voice = self._get_voice(name)
+        if current_voice is None:
+            current_voice = self._get_voice(name)
+        response = self.client.generate(
+            text=text,
+            model=ELEVEN_LABS_MODEL,
+            voice=Voice(
+                voice_id=current_voice.voice_id,
+                settings=VoiceSettings(
+                    stability=stability,
+                    similarity_boost=similarity_boost,
+                    style=style,
+                    use_speaker_boost=use_speaker_boost,
+                    language=language,
+                ),
+            ),
+        )
+        # Create a BytesIO object to hold the audio data in memory
+        audio_stream = BytesIO()
+        # Write each chunk of audio data to the stream
+        for chunk in response:
+            if chunk:
+                audio_stream.write(chunk)
+        # Reset stream position to the beginning
+        audio_stream.seek(0)
+        # Load the audio stream into an AudioSegment
+        audio_segment = AudioSegment.from_file(audio_stream, format="mp3")
+        # Create silent segments for before and after
+        mute_before = AudioSegment.silent(duration=mute_before_ms)
+        mute_after = AudioSegment.silent(duration=mute_after_ms)
+        # Concatenate the segments
+        combined_segment = mute_before + audio_segment + mute_after
+        tmp_file = generate_random_filename("./tmp", "mp3")
+        # Export the combined audio to the specified file
+        combined_segment.export(tmp_file, format="mp3", bitrate="128k")
+        return tmp_file