File size: 4,422 Bytes
bf9542e 2446386 1b12140 922050b 1b12140 bf9542e 922050b bf9542e 6aeab44 51a9e4f bf9542e 2446386 bf9542e 0648a36 922050b 7e868b7 922050b 7e868b7 6aeab44 922050b e6882db 922050b e6882db 922050b e6882db 922050b e6882db 922050b bf9542e 2446386 5f6bd2c 2446386 4f4216e 2446386 cc37f03 2446386 bf9542e 4f4216e 2446386 db145e2 4e41537 cc37f03 922050b cc37f03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import gradio as gr
import openai
import yt_dlp
import os
import io
import tempfile
from pydub import AudioSegment
def split_audio(file_path, chunk_length_ms):
audio = AudioSegment.from_file(file_path)
duration = len(audio)
chunks = []
start_time = 0
while start_time < duration:
end_time = start_time + chunk_length_ms
if end_time > duration:
end_time = duration
chunk = audio[start_time:end_time]
chunks.append(chunk)
start_time += chunk_length_ms
return chunks
def split_string_by_tokens(text, max_tokens=500):
words = text.split()
chunks = []
current_chunk = []
for word in words:
current_chunk.append(word)
if len(current_chunk) >= max_tokens:
chunks.append(' '.join(current_chunk))
current_chunk = []
if current_chunk:
chunks.append(' '.join(current_chunk))
return chunks
openai.api_key = os.environ['OPENAI_API_KEY']
def asr(url):
# delete the video
os.system("rm *audio_download*")
# download audio
# Options for youtube-dl
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'audio_downloaded.%(ext)s',
'no_continue': True,
}
# Create a youtube-dl object
ydl = yt_dlp.YoutubeDL(ydl_opts)
# Download the video
info_dict = ydl.extract_info(url, download=True)
if info_dict is not None:
audio_file_name = "audio_downloaded.{}".format(info_dict["ext"])
else:
return "下载音频发生错误,请确认链接再试一次。", "Error downloading the audio. Check the URL and try again."
yield "下载视频完成. 开始分割视频...", ""
chunks = split_audio(audio_file_name, chunk_length_ms=30 * 1000)
transcripts = []
for idx, chunk in enumerate(chunks):
temp_file_path = None
with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav", delete=False) as temp_file:
temp_file_path = temp_file.name
chunk.export(temp_file.name, format="wav")
with open(temp_file_path, "rb") as temp_file:
transcript = openai.Audio.transcribe("whisper-1", temp_file)
os.remove(temp_file_path)
transcripts.append(transcript["text"])
yield "请耐心等待语音识别完成...({}/{})".format(idx + 1, len(chunks)), " ".join(transcripts)
# delete the video
os.system("rm {}".format(audio_file_name))
translations = []
full_transcript = " ".join(transcripts)
# split into 500 tokens
transcript_chunks = split_string_by_tokens(full_transcript, max_tokens=500)
yield "语音识别完成, 开始翻译...(0/{})".format(len(transcript_chunks)), full_transcript
# split transcripts if its too long
for idx, transcript in enumerate(transcript_chunks):
output = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "user",
"content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript)},
],
stream=True,
)
for event in output:
translations.append(event["choices"][0].get("delta", "").get("content", ""))
yield "请耐心等候翻译:({}/{})...".format(idx+1, len(transcript_chunks)) + "".join(translations), " ".join(transcripts)
full_translation = "".join(translations)
yield full_translation, full_transcript
title = """
轻声细译"""
# Create an instruction input component
instruction = """
<div style="border: 2px solid #000; padding: 10px; border-radius: 5px;">
视频翻译 (video-translation):输入视频链接,进行中文翻译 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br>
1.将视频链接(支持Twitter、YouTube)复制粘贴至输入框,点击提交(Submit)即可;
</div>"""
demo = gr.Interface(fn=asr,
inputs=gr.inputs.Textbox(label="粘贴视频链接"),
outputs=[
gr.outputs.Textbox(label="翻译"),
gr.outputs.Textbox(label="音频转录")
],
title=title,
description=instruction,
theme="JohnSmith9982/small_and_pretty")
demo.queue()
demo.launch()
|