Update app.py
Browse files
app.py
CHANGED
@@ -5,9 +5,11 @@ import os
|
|
5 |
from pyannote.audio import Pipeline
|
6 |
from pydub import AudioSegment
|
7 |
|
8 |
-
#
|
9 |
HF_TOKEN = os.environ.get("HUGGINGFACE_READ_TOKEN")
|
10 |
pipeline = None
|
|
|
|
|
11 |
try:
|
12 |
pipeline = Pipeline.from_pretrained(
|
13 |
"pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN
|
@@ -18,7 +20,7 @@ except Exception as e:
|
|
18 |
print(f"Error initializing pipeline: {e}")
|
19 |
pipeline = None
|
20 |
|
21 |
-
#
|
22 |
def combine_audio_with_time(target_audio, mixed_audio):
|
23 |
if pipeline is None:
|
24 |
return "错误: 模型未初始化"
|
@@ -38,8 +40,8 @@ def combine_audio_with_time(target_audio, mixed_audio):
|
|
38 |
# 将目标说话人的音频片段添加到混合音频的最后
|
39 |
mixed_audio_segment + target_audio_segment
|
40 |
|
41 |
-
#
|
42 |
-
return target_start_time, target_end_time
|
43 |
|
44 |
# 使用 pyannote/speaker-diarization 对拼接后的音频进行说话人分离
|
45 |
@spaces.GPU(duration=60 * 2) # 使用 GPU 加速,限制执行时间为 120 秒
|
@@ -92,18 +94,18 @@ def timestamp_to_seconds(timestamp):
|
|
92 |
|
93 |
# 处理音频文件并返回输出
|
94 |
def process_audio(target_audio, mixed_audio):
|
95 |
-
#
|
96 |
-
|
97 |
|
98 |
# 执行说话人分离
|
99 |
-
diarization_result = diarize_audio(
|
100 |
|
101 |
if diarization_result.startswith("错误"):
|
102 |
return diarization_result, None, None # 出错时返回错误信息
|
103 |
else:
|
104 |
# 生成标签文件
|
105 |
label_file = generate_labels_from_diarization(diarization_result)
|
106 |
-
return diarization_result, label_file,
|
107 |
|
108 |
# Gradio 接口
|
109 |
with gr.Blocks() as demo:
|
@@ -129,4 +131,4 @@ with gr.Blocks() as demo:
|
|
129 |
outputs=[diarization_output, label_file_link, time_range_output]
|
130 |
)
|
131 |
|
132 |
-
demo.launch(share=
|
|
|
5 |
from pyannote.audio import Pipeline
|
6 |
from pydub import AudioSegment
|
7 |
|
8 |
+
# 获取 Hugging Face 认证令牌
|
9 |
HF_TOKEN = os.environ.get("HUGGINGFACE_READ_TOKEN")
|
10 |
pipeline = None
|
11 |
+
|
12 |
+
# 尝试加载 pyannote 模型
|
13 |
try:
|
14 |
pipeline = Pipeline.from_pretrained(
|
15 |
"pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN
|
|
|
20 |
print(f"Error initializing pipeline: {e}")
|
21 |
pipeline = None
|
22 |
|
23 |
+
# 音频拼接函数:拼接目标音频和混合音频,返回目标音频的起始时间和结束时间作为字典
|
24 |
def combine_audio_with_time(target_audio, mixed_audio):
|
25 |
if pipeline is None:
|
26 |
return "错误: 模型未初始化"
|
|
|
40 |
# 将目标说话人的音频片段添加到混合音频的最后
|
41 |
mixed_audio_segment + target_audio_segment
|
42 |
|
43 |
+
# 返回字典,包含目标音频的起始和结束时间
|
44 |
+
return {"start_time": target_start_time, "end_time": target_end_time}
|
45 |
|
46 |
# 使用 pyannote/speaker-diarization 对拼接后的音频进行说话人分离
|
47 |
@spaces.GPU(duration=60 * 2) # 使用 GPU 加速,限制执行时间为 120 秒
|
|
|
94 |
|
95 |
# 处理音频文件并返回输出
|
96 |
def process_audio(target_audio, mixed_audio):
|
97 |
+
# 进行音频拼接并返回目标音频的起始和结束时间(作为字典)
|
98 |
+
time_dict = combine_audio_with_time(target_audio, mixed_audio)
|
99 |
|
100 |
# 执行说话人分离
|
101 |
+
diarization_result = diarize_audio("final_output.wav")
|
102 |
|
103 |
if diarization_result.startswith("错误"):
|
104 |
return diarization_result, None, None # 出错时返回错误信息
|
105 |
else:
|
106 |
# 生成标签文件
|
107 |
label_file = generate_labels_from_diarization(diarization_result)
|
108 |
+
return diarization_result, label_file, time_dict # 返回说话人分离结果、标签文件和目标音频的时间段
|
109 |
|
110 |
# Gradio 接口
|
111 |
with gr.Blocks() as demo:
|
|
|
131 |
outputs=[diarization_output, label_file_link, time_range_output]
|
132 |
)
|
133 |
|
134 |
+
demo.launch(share=True)
|