Update app.py
Browse files
app.py
CHANGED
@@ -24,12 +24,22 @@ except Exception as e:
|
|
24 |
def combine_audio_with_time(target_audio, mixed_audio):
|
25 |
if pipeline is None:
|
26 |
return "错误: 模型未初始化"
|
|
|
|
|
|
|
|
|
27 |
|
28 |
# 加载目标说话人的样本音频
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
31 |
# 加载混合音频
|
32 |
-
|
|
|
|
|
|
|
33 |
|
34 |
# 记录目标说话人音频的时间点(精确到0.01秒)
|
35 |
target_start_time = len(mixed_audio_segment) / 1000 # 秒为单位,精确到 0.01 秒
|
@@ -38,9 +48,10 @@ def combine_audio_with_time(target_audio, mixed_audio):
|
|
38 |
target_end_time = target_start_time + len(target_audio_segment) / 1000 # 秒为单位
|
39 |
|
40 |
# 将目标说话人的音频片段添加到混合音频的最后
|
41 |
-
mixed_audio_segment + target_audio_segment
|
|
|
42 |
|
43 |
-
#
|
44 |
return {"start_time": target_start_time, "end_time": target_end_time}
|
45 |
|
46 |
# 使用 pyannote/speaker-diarization 对拼接后的音频进行说话人分离
|
@@ -94,6 +105,9 @@ def timestamp_to_seconds(timestamp):
|
|
94 |
|
95 |
# 处理音频文件并返回输出
|
96 |
def process_audio(target_audio, mixed_audio):
|
|
|
|
|
|
|
97 |
# 进行音频拼接并返回目标音频的起始和结束时间(作为字典)
|
98 |
time_dict = combine_audio_with_time(target_audio, mixed_audio)
|
99 |
|
|
|
24 |
def combine_audio_with_time(target_audio, mixed_audio):
|
25 |
if pipeline is None:
|
26 |
return "错误: 模型未初始化"
|
27 |
+
|
28 |
+
# 打印文件路径,确保文件正确传递
|
29 |
+
print(f"目标音频文件路径: {target_audio}")
|
30 |
+
print(f"混合音频文件路径: {mixed_audio}")
|
31 |
|
32 |
# 加载目标说话人的样本音频
|
33 |
+
try:
|
34 |
+
target_audio_segment = AudioSegment.from_wav(target_audio)
|
35 |
+
except Exception as e:
|
36 |
+
return f"加载目标音频时出错: {e}"
|
37 |
+
|
38 |
# 加载混合音频
|
39 |
+
try:
|
40 |
+
mixed_audio_segment = AudioSegment.from_wav(mixed_audio)
|
41 |
+
except Exception as e:
|
42 |
+
return f"加载混合音频时出错: {e}"
|
43 |
|
44 |
# 记录目标说话人音频的时间点(精确到0.01秒)
|
45 |
target_start_time = len(mixed_audio_segment) / 1000 # 秒为单位,精确到 0.01 秒
|
|
|
48 |
target_end_time = target_start_time + len(target_audio_segment) / 1000 # 秒为单位
|
49 |
|
50 |
# 将目标说话人的音频片段添加到混合音频的最后
|
51 |
+
final_audio = mixed_audio_segment + target_audio_segment
|
52 |
+
final_audio.export("final_output.wav", format="wav")
|
53 |
|
54 |
+
# 返回目标音频的起始时间和结束时间
|
55 |
return {"start_time": target_start_time, "end_time": target_end_time}
|
56 |
|
57 |
# 使用 pyannote/speaker-diarization 对拼接后的音频进行说话人分离
|
|
|
105 |
|
106 |
# 处理音频文件并返回输出
|
107 |
def process_audio(target_audio, mixed_audio):
|
108 |
+
# 打印文件路径,确保传入的文件有效
|
109 |
+
print(f"处理音频:目标音频: {target_audio}, 混合音频: {mixed_audio}")
|
110 |
+
|
111 |
# 进行音频拼接并返回目标音频的起始和结束时间(作为字典)
|
112 |
time_dict = combine_audio_with_time(target_audio, mixed_audio)
|
113 |
|