|
import gradio as gr |
|
import openai |
|
import yt_dlp |
|
import os |
|
import io |
|
import tempfile |
|
from pydub import AudioSegment |
|
|
|
def split_audio(file_path, chunk_length_ms): |
|
audio = AudioSegment.from_file(file_path) |
|
duration = len(audio) |
|
chunks = [] |
|
start_time = 0 |
|
while start_time < duration: |
|
end_time = start_time + chunk_length_ms |
|
if end_time > duration: |
|
end_time = duration |
|
chunk = audio[start_time:end_time] |
|
chunks.append(chunk) |
|
start_time += chunk_length_ms |
|
return chunks |
|
|
|
def split_string_by_tokens(text, max_tokens=500): |
|
words = text.split() |
|
chunks = [] |
|
current_chunk = [] |
|
|
|
for word in words: |
|
current_chunk.append(word) |
|
if len(current_chunk) >= max_tokens: |
|
chunks.append(' '.join(current_chunk)) |
|
current_chunk = [] |
|
|
|
if current_chunk: |
|
chunks.append(' '.join(current_chunk)) |
|
|
|
return chunks |
|
|
|
openai.api_key = os.environ['OPENAI_API_KEY'] |
|
|
|
def asr(url): |
|
|
|
os.system("rm *audio_download*") |
|
|
|
|
|
ydl_opts = { |
|
'format': 'bestaudio/best', |
|
'outtmpl': 'audio_downloaded.%(ext)s', |
|
'no_continue': True, |
|
} |
|
|
|
|
|
ydl = yt_dlp.YoutubeDL(ydl_opts) |
|
|
|
|
|
info_dict = ydl.extract_info(url, download=True) |
|
if info_dict is not None: |
|
audio_file_name = "audio_downloaded.{}".format(info_dict["ext"]) |
|
else: |
|
return "下载音频发生错误,请确认链接再试一次。", "Error downloading the audio. Check the URL and try again." |
|
|
|
yield "下载视频完成. 开始分割视频...", "" |
|
chunks = split_audio(audio_file_name, chunk_length_ms=30 * 1000) |
|
transcripts = [] |
|
|
|
for idx, chunk in enumerate(chunks): |
|
|
|
temp_file_path = None |
|
with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav", delete=False) as temp_file: |
|
temp_file_path = temp_file.name |
|
chunk.export(temp_file.name, format="wav") |
|
|
|
with open(temp_file_path, "rb") as temp_file: |
|
transcript = openai.Audio.transcribe("whisper-1", temp_file) |
|
|
|
os.remove(temp_file_path) |
|
transcripts.append(transcript["text"]) |
|
|
|
yield "请耐心等待语音识别完成...({}/{})".format(idx + 1, len(chunks)), " ".join(transcripts) |
|
|
|
|
|
os.system("rm {}".format(audio_file_name)) |
|
|
|
translations = [] |
|
full_transcript = " ".join(transcripts) |
|
|
|
transcript_chunks = split_string_by_tokens(full_transcript, max_tokens=500) |
|
yield "语音识别完成, 开始翻译...(0/{})".format(len(transcript_chunks)), full_transcript |
|
|
|
for idx, transcript in enumerate(transcript_chunks): |
|
output = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "user", |
|
"content": "Transcript: {transcript}. \n Translate the video conversation transcript into fluent Chinese. Chinese: ".format(transcript=transcript)}, |
|
], |
|
stream=True, |
|
) |
|
for event in output: |
|
translations.append(event["choices"][0].get("delta", "").get("content", "")) |
|
|
|
yield "请耐心等候翻译:({}/{})...".format(idx+1, len(transcript_chunks)) + "".join(translations), " ".join(transcripts) |
|
|
|
full_translation = "".join(translations) |
|
yield full_translation, full_transcript |
|
|
|
title = """ |
|
轻声细译""" |
|
|
|
instruction = """ |
|
<div style="border: 2px solid #000; padding: 10px; border-radius: 5px;"> |
|
视频翻译 (video-translation):输入视频链接,进行中文翻译 <span style="color: grey;">-- powered by OpenAI Whisper & ChatGPT.</span>.<br> |
|
|
|
1.将视频链接(支持Twitter、YouTube)复制粘贴至输入框,点击提交(Submit)即可; |
|
</div>""" |
|
|
|
demo = gr.Interface(fn=asr, |
|
inputs=gr.inputs.Textbox(label="粘贴视频链接"), |
|
outputs=[ |
|
gr.outputs.Textbox(label="翻译"), |
|
gr.outputs.Textbox(label="音频转录") |
|
], |
|
title=title, |
|
description=instruction, |
|
theme="JohnSmith9982/small_and_pretty") |
|
|
|
demo.queue() |
|
demo.launch() |
|
|