LivePortrait

Running on Zero

LivePortrait / elevenlabs_utils.py

yerang

add files

d98c79a about 1 month ago

4.98 kB

	import os
	from io import BytesIO
	from typing import IO, Optional
	import time
	import uuid
	from pathlib import Path

	from pydub import AudioSegment
	import gradio as gr
	from elevenlabs import Voice, VoiceSettings, save
	from elevenlabs.client import ElevenLabs




	def generate_random_filename(parent, extension="txt"):
	"""
	Generates a random filename using UUID and current timestamp.

	Args:
	extension (str): The file extension for the generated filename. Default is 'txt'.

	Returns:
	str: A random filename with the specified extension.
	"""
	# Generate a random UUID
	random_uuid = uuid.uuid4()

	# Get the current timestamp
	timestamp = int(time.time())

	# Combine UUID and timestamp to create a unique filename
	filename = f"{random_uuid}_{timestamp}.{extension}"
	file_path = os.path.join(parent, filename)
	return file_path

	ELEVEN_LABS_MODEL = os.getenv("ELEVEN_LABS_MODEL", "eleven_multilingual_v2")
	ELEVEN_LABS_LANGUAGE_SUPPORTS = [
	"English",
	"Chinese",
	"Spanish",
	"Hindi",
	"Portuguese",
	"French",
	"German",
	"Japanese",
	"Arabic",
	"Korean",
	"Indonesian",
	"Italian",
	"Dutch",
	"Turkish",
	"Polish",
	"Swedish",
	"Filipino",
	"Malay",
	"Russian",
	"Romanian",
	"Ukrainian",
	"Greek",
	"Czech",
	"Danish",
	"Finnish",
	"Bulgarian",
	"Croatian",
	"Slovak",
	"Tamil",
	]


	class ElevenLabsPipeline:
	def __init__(self):

	eleven_labs_api_key = os.getenv("ELEVENLABS_API_KEY", "sk_f4f7d77bc8065b15824cf52ea46c7d99e0e5db2a0f93b673")
	if eleven_labs_api_key is None:
	raise Exception("ELEVENLABS_API_KEY 환경변수를 설정해주세요.")
	self.client = ElevenLabs(
	api_key=eleven_labs_api_key, # Defaults to ELEVEN_API_KEY
	)
	os.makedirs("./tmp", exist_ok=True)

	def clone_voice(self, audio, name, description=None):
	response = self.client.voices.get_all()
	for voice in response.voices:
	if voice.name == name:
	return "존재하는 음성입니다. 음성 생성을 시작해주세요."

	try:
	voice = self.client.clone(
	name=name,
	description=description, # Optional
	files=[audio],
	)
	return "Voice Clone을 성공적으로 생성했습니다."
	except Exception as e:
	return str(e)

	def _get_voice(self, name: str):
	response = self.client.voices.get_all()
	current_voice = None
	for voice in response.voices:
	if voice.name == name:
	current_voice = voice
	break

	return current_voice

	def generate_voice(
	self,
	text: str,
	audio: str = None,
	language: str = "ko",
	mute_before_ms: Optional[int] = 0,
	mute_after_ms: Optional[int] = 0,
	stability: float = 0.5,
	similarity_boost: float = 0.75,
	style: float = 0.0,
	use_speaker_boost=True,
	) -> str:

	if audio is not None:
	name = Path(audio).stem
	self.clone_voice(audio, name)
	else:
	gr.Info("음성이 안주어졌습니다. 기본 음성으로 생성하겠습니다.", duration=2)
	name = "Laura"

	current_voice = self._get_voice(name)

	if current_voice is None:
	current_voice = self._get_voice(name)


	response = self.client.generate(
	text=text,
	model=ELEVEN_LABS_MODEL,
	voice=Voice(
	voice_id=current_voice.voice_id,
	settings=VoiceSettings(
	stability=stability,
	similarity_boost=similarity_boost,
	style=style,
	use_speaker_boost=use_speaker_boost,
	language=language,
	),
	),
	)

	# Create a BytesIO object to hold the audio data in memory
	audio_stream = BytesIO()

	# Write each chunk of audio data to the stream
	for chunk in response:
	if chunk:
	audio_stream.write(chunk)

	# Reset stream position to the beginning
	audio_stream.seek(0)

	# Load the audio stream into an AudioSegment
	audio_segment = AudioSegment.from_file(audio_stream, format="mp3")

	# Create silent segments for before and after
	mute_before = AudioSegment.silent(duration=mute_before_ms)
	mute_after = AudioSegment.silent(duration=mute_after_ms)

	# Concatenate the segments
	combined_segment = mute_before + audio_segment + mute_after

	tmp_file = generate_random_filename("./tmp", "mp3")

	# Export the combined audio to the specified file
	combined_segment.export(tmp_file, format="mp3", bitrate="128k")

	return tmp_file