QLWD commited on
Commit
e4591db
1 Parent(s): ceffe1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -86,29 +86,27 @@ def calculate_overlap(start1, end1, start2, end2):
86
  overlap_duration = max(0, overlap_end - overlap_start)
87
  return overlap_duration
88
 
89
- # 获取目标时间段和说话人时间段的重叠比例
90
- def get_matching_segments(target_time, diarization_output):
91
- target_start_time = target_time['start_time']
92
- target_end_time = target_time['end_time']
93
-
94
- # 获取该说话人时间段的信息,排除目标录音时间段
95
  speaker_segments = {}
96
- for speech_turn in diarization_output.itertracks(yield_label=True): # 使用 itertracks 获取每个说话人的信息
 
 
97
  start_seconds = speech_turn[0].start
98
  end_seconds = speech_turn[0].end
99
  label = speech_turn[1]
100
 
101
- # 计算目标音频时间段和说话人时间段的重叠时间
102
- overlap = calculate_overlap(target_start_time, target_end_time, start_seconds, end_seconds)
 
 
103
 
104
- # 如果存在重叠,排除目标音频时间段
105
- if overlap > 0:
106
- if label not in speaker_segments:
107
- speaker_segments[label] = []
108
-
109
- # 如果时间段与目标音频有重叠,跳过该时间段
110
- if start_seconds >= target_end_time or end_seconds <= target_start_time:
111
- speaker_segments[label].append((start_seconds, end_seconds))
112
 
113
  return speaker_segments
114
 
@@ -126,20 +124,23 @@ def process_audio(target_audio, mixed_audio):
126
  if isinstance(diarization_result, str) and diarization_result.startswith("错误"):
127
  return diarization_result, None # 出错时返回错误信息
128
  else:
129
- # 获取该说话人的所有匹配时间段(排除目标音频时间段)
130
- matching_segments = get_matching_segments(time_dict, diarization_result)
 
 
 
131
 
132
- if matching_segments:
133
- # 返回匹配的说话人标签和他们的时间段
134
- return matching_segments
135
  else:
136
- return "没有找到匹配的说话人时间段。"
137
 
138
  # Gradio 接口
139
  with gr.Blocks() as demo:
140
  gr.Markdown("""
141
  # 🗣️ 音频拼接与说话人分类 🗣️
142
- 上传目标音频和混合音频,拼接并进行说话人分类。结果包括所有匹配说话人的时间段(排除目标录音时间段)。
143
  """)
144
 
145
  mixed_audio_input = gr.Audio(type="filepath", label="上传混合音频")
@@ -148,7 +149,7 @@ with gr.Blocks() as demo:
148
  process_button = gr.Button("处理音频")
149
 
150
  # 输出结果
151
- diarization_output = gr.Textbox(label="匹配的说话人时间段")
152
 
153
  # 点击按钮时触发处理音频
154
  process_button.click(
 
86
  overlap_duration = max(0, overlap_end - overlap_start)
87
  return overlap_duration
88
 
89
+ # 获取所有说话人时间段(排除目标录音时间段)
90
+ def get_all_speaker_segments(diarization_output, target_start_time, target_end_time, final_audio_length):
 
 
 
 
91
  speaker_segments = {}
92
+
93
+ # 使用 itertracks 获取每个说话人的信息
94
+ for speech_turn in diarization_output.itertracks(yield_label=True):
95
  start_seconds = speech_turn[0].start
96
  end_seconds = speech_turn[0].end
97
  label = speech_turn[1]
98
 
99
+ # 检查时间段是否与目标录音重叠
100
+ if start_seconds < target_end_time and end_seconds > target_start_time:
101
+ # 如果时间段与目标音频有重叠,调整结束时间
102
+ end_seconds = min(end_seconds, final_audio_length)
103
 
104
+ # 存储说话人的时间段
105
+ if label not in speaker_segments:
106
+ speaker_segments[label] = []
107
+
108
+ # 添加处理后的时间段
109
+ speaker_segments[label].append((start_seconds, end_seconds))
 
 
110
 
111
  return speaker_segments
112
 
 
124
  if isinstance(diarization_result, str) and diarization_result.startswith("错误"):
125
  return diarization_result, None # 出错时返回错误信息
126
  else:
127
+ # 获取拼接后的音频长度
128
+ final_audio_length = len(AudioSegment.from_wav("final_output.wav")) / 1000 # 秒为单位
129
+
130
+ # 获取所有说话人的时间段
131
+ speaker_segments = get_all_speaker_segments(diarization_result, time_dict['start_time'], time_dict['end_time'], final_audio_length)
132
 
133
+ if speaker_segments:
134
+ # 返回所有说话人的时间段
135
+ return speaker_segments
136
  else:
137
+ return "没有找到任何说话人的时间段。"
138
 
139
  # Gradio 接口
140
  with gr.Blocks() as demo:
141
  gr.Markdown("""
142
  # 🗣️ 音频拼接与说话人分类 🗣️
143
+ 上传目标音频和混合音频,拼接并进行说话人分类。结果包括所有说话人的时间段(排除目标录音时间段)。
144
  """)
145
 
146
  mixed_audio_input = gr.Audio(type="filepath", label="上传混合音频")
 
149
  process_button = gr.Button("处理音频")
150
 
151
  # 输出结果
152
+ diarization_output = gr.Textbox(label="说话人时间段")
153
 
154
  # 点击按钮时触发处理音频
155
  process_button.click(