QLWD commited on
Commit
0bca3ec
1 Parent(s): 3152b48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -20
app.py CHANGED
@@ -64,7 +64,8 @@ def diarize_audio(temp_file):
64
  diarization = pipeline(temp_file)
65
  except Exception as e:
66
  return f"处理音频时出错: {e}"
67
-
 
68
  # 返回 diarization 类对象
69
  return diarization
70
 
@@ -85,12 +86,12 @@ def calculate_overlap(start1, end1, start2, end2):
85
  return overlap_duration
86
 
87
  # 获取目标时间段和说话人时间段的重叠比例
88
- def get_best_match(target_time, diarization_output):
89
  target_start_time = target_time['start_time']
90
  target_end_time = target_time['end_time']
91
 
92
- # 通过 diarization_output 获取说话人信息
93
- speaker_segments = []
94
  for speech_turn in diarization_output.itertracks(yield_label=True): # 使用 itertracks 获取每个说话人的信息
95
  start_seconds = speech_turn[0].start
96
  end_seconds = speech_turn[0].end
@@ -98,15 +99,17 @@ def get_best_match(target_time, diarization_output):
98
 
99
  # 计算目标音频时间段和说话人时间段的重叠时间
100
  overlap = calculate_overlap(target_start_time, target_end_time, start_seconds, end_seconds)
101
- overlap_ratio = overlap / (end_seconds - start_seconds)
102
 
103
- # 记录说话人标签和重叠比例
104
- speaker_segments.append((label, overlap_ratio, start_seconds, end_seconds))
 
 
 
 
 
 
105
 
106
- # 按照重叠比例排序,返回重叠比例最大的一段
107
- best_match = max(speaker_segments, key=lambda x: x[1], default=None)
108
-
109
- return best_match
110
 
111
  # 处理音频文件并返回输出
112
  def process_audio(target_audio, mixed_audio):
@@ -122,18 +125,20 @@ def process_audio(target_audio, mixed_audio):
122
  if isinstance(diarization_result, str) and diarization_result.startswith("错误"):
123
  return diarization_result, None # 出错时返回错误信息
124
  else:
125
- # 获取最佳匹配的说话人标签和时间段
126
- best_match = get_best_match(time_dict, diarization_result)
127
 
128
- if best_match:
129
- # 返回最佳匹配说话人的标签和时间段
130
- return best_match[0], best_match[2], best_match[3]
 
 
131
 
132
  # Gradio 接口
133
  with gr.Blocks() as demo:
134
  gr.Markdown("""
135
  # 🗣️ 音频拼接与说话人分类 🗣️
136
- 上传目标音频和混合音频,拼接并进行说话人分类。结果包括最佳匹配说话人的时间段。
137
  """)
138
 
139
  mixed_audio_input = gr.Audio(type="filepath", label="上传混合音频")
@@ -142,14 +147,13 @@ with gr.Blocks() as demo:
142
  process_button = gr.Button("处理音频")
143
 
144
  # 输出结果
145
- diarization_output = gr.Textbox(label="最佳匹配说话人")
146
- time_range_output = gr.Textbox(label="最佳匹配时间段")
147
 
148
  # 点击按钮时触发处理音频
149
  process_button.click(
150
  fn=process_audio,
151
  inputs=[target_audio_input, mixed_audio_input],
152
- outputs=[diarization_output, time_range_output]
153
  )
154
 
155
  demo.launch(share=True)
 
64
  diarization = pipeline(temp_file)
65
  except Exception as e:
66
  return f"处理音频时出错: {e}"
67
+
68
+ print(diarization)
69
  # 返回 diarization 类对象
70
  return diarization
71
 
 
86
  return overlap_duration
87
 
88
  # 获取目标时间段和说话人时间段的重叠比例
89
+ def get_matching_segments(target_time, diarization_output):
90
  target_start_time = target_time['start_time']
91
  target_end_time = target_time['end_time']
92
 
93
+ # 获取该说话人时间段的信息,排除目标录音时间段
94
+ speaker_segments = {}
95
  for speech_turn in diarization_output.itertracks(yield_label=True): # 使用 itertracks 获取每个说话人的信息
96
  start_seconds = speech_turn[0].start
97
  end_seconds = speech_turn[0].end
 
99
 
100
  # 计算目标音频时间段和说话人时间段的重叠时间
101
  overlap = calculate_overlap(target_start_time, target_end_time, start_seconds, end_seconds)
 
102
 
103
+ # 如果存在重叠,排除目标音频时间段
104
+ if overlap > 0:
105
+ if label not in speaker_segments:
106
+ speaker_segments[label] = []
107
+
108
+ # 如果时间段与目标音频有重叠,跳过该时间段
109
+ if start_seconds >= target_end_time or end_seconds <= target_start_time:
110
+ speaker_segments[label].append((start_seconds, end_seconds))
111
 
112
+ return speaker_segments
 
 
 
113
 
114
  # 处理音频文件并返回输出
115
  def process_audio(target_audio, mixed_audio):
 
125
  if isinstance(diarization_result, str) and diarization_result.startswith("错误"):
126
  return diarization_result, None # 出错时返回错误信息
127
  else:
128
+ # 获取该说话人的所有匹配时间段(排除目标音频时间段)
129
+ matching_segments = get_matching_segments(time_dict, diarization_result)
130
 
131
+ if matching_segments:
132
+ # 返回匹配的说话人标签和他们的时间段
133
+ return matching_segments
134
+ else:
135
+ return "没有找到匹配的说话人时间段。"
136
 
137
  # Gradio 接口
138
  with gr.Blocks() as demo:
139
  gr.Markdown("""
140
  # 🗣️ 音频拼接与说话人分类 🗣️
141
+ 上传目标音频和混合音频,拼接并进行说话人分类。结果包括所有匹配说话人的时间段(排除目标录音时间段)。
142
  """)
143
 
144
  mixed_audio_input = gr.Audio(type="filepath", label="上传混合音频")
 
147
  process_button = gr.Button("处理音频")
148
 
149
  # 输出结果
150
+ diarization_output = gr.Textbox(label="匹配的说话人时间段")
 
151
 
152
  # 点击按钮时触发处理音频
153
  process_button.click(
154
  fn=process_audio,
155
  inputs=[target_audio_input, mixed_audio_input],
156
+ outputs=[diarization_output]
157
  )
158
 
159
  demo.launch(share=True)