einanao commited on
Commit
395a84b
1 Parent(s): e7b3a9f

add spinners

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -166,7 +166,8 @@ def strike(url, speedup_factor, min_speedup, max_speedup, max_num_segments):
166
 
167
  min_speedup = max(0.5, min_speedup) # ffmpeg limit
168
 
169
- name = download(url, YDL_OPTS)
 
170
  assert name.endswith(".m4a")
171
  name = name.split(".m4a")[0].split("/")[-1]
172
 
@@ -174,13 +175,15 @@ def strike(url, speedup_factor, min_speedup, max_speedup, max_num_segments):
174
  transcript_path = os.path.join(DATA_DIR, "%s.json" % name)
175
  output_path = os.path.join(DATA_DIR, "%s_smooth.mp3" % name)
176
 
177
- segments = transcribe(audio_path, transcript_path)
 
178
 
179
  seg_durations = compute_seg_durations(segments)
180
 
181
- info_densities = compute_info_densities(
182
- segments, seg_durations, llm, tokenizer, device
183
- )
 
184
 
185
  total_duration = segments[-1]["end"] - segments[0]["start"]
186
  min_sec_leaf = total_duration / max_num_segments
@@ -201,7 +204,8 @@ def strike(url, speedup_factor, min_speedup, max_speedup, max_num_segments):
201
  total_duration,
202
  )
203
 
204
- cat_clips(squashed_times, speedups, audio_path, output_path)
 
205
 
206
  spedup_total_duration, actual_speedup_factor = compute_actual_speedup(
207
  squashed_durations, speedups, total_duration
@@ -216,19 +220,24 @@ def strike(url, speedup_factor, min_speedup, max_speedup, max_num_segments):
216
  data = [times, info_densities / np.log(2), annotations]
217
  cols = ["time (minutes)", "bits per second", "transcript"]
218
  df = pd.DataFrame(list(zip(*data)), columns=cols)
219
-
 
220
  lines = (
221
  alt.Chart(df, title="information rate")
222
  .mark_line(color="gray", opacity=0.5)
223
  .encode(
224
- x=cols[0],
225
  y=cols[1],
226
  )
227
  )
228
  dots = (
229
  alt.Chart(df)
230
  .mark_circle(size=50, opacity=1)
231
- .encode(x=cols[0], y=cols[1], tooltip=["transcript"])
 
 
 
 
232
  )
233
  st.altair_chart((lines + dots).interactive(), use_container_width=True)
234
 
@@ -245,7 +254,7 @@ def strike(url, speedup_factor, min_speedup, max_speedup, max_num_segments):
245
  alt.Chart(df, title="speedup based on information rate")
246
  .mark_line()
247
  .encode(
248
- x=cols[0],
249
  y=alt.Y(
250
  cols[1],
251
  scale=alt.Scale(
@@ -274,9 +283,8 @@ with st.form("my_form"):
274
  if submitted:
275
  st.write("original video:")
276
  st.video(url)
277
- with st.spinner("processing audio..."):
278
- output_path = strike(
279
- url, speedup_factor, min_speedup, max_speedup, max_num_segments
280
- )
281
  st.write("processed audio:")
282
  st.audio(output_path)
 
166
 
167
  min_speedup = max(0.5, min_speedup) # ffmpeg limit
168
 
169
+ with st.spinner("downloading..."):
170
+ name = download(url, YDL_OPTS)
171
  assert name.endswith(".m4a")
172
  name = name.split(".m4a")[0].split("/")[-1]
173
 
 
175
  transcript_path = os.path.join(DATA_DIR, "%s.json" % name)
176
  output_path = os.path.join(DATA_DIR, "%s_smooth.mp3" % name)
177
 
178
+ with st.spinner("transcribing..."):
179
+ segments = transcribe(audio_path, transcript_path)
180
 
181
  seg_durations = compute_seg_durations(segments)
182
 
183
+ with st.spinner("calculating information density..."):
184
+ info_densities = compute_info_densities(
185
+ segments, seg_durations, llm, tokenizer, device
186
+ )
187
 
188
  total_duration = segments[-1]["end"] - segments[0]["start"]
189
  min_sec_leaf = total_duration / max_num_segments
 
204
  total_duration,
205
  )
206
 
207
+ with st.spinner("stitching segments..."):
208
+ cat_clips(squashed_times, speedups, audio_path, output_path)
209
 
210
  spedup_total_duration, actual_speedup_factor = compute_actual_speedup(
211
  squashed_durations, speedups, total_duration
 
220
  data = [times, info_densities / np.log(2), annotations]
221
  cols = ["time (minutes)", "bits per second", "transcript"]
222
  df = pd.DataFrame(list(zip(*data)), columns=cols)
223
+ min_time = segments[0]["start"] / 60
224
+ max_time = segments[-1]["end"] / 60
225
  lines = (
226
  alt.Chart(df, title="information rate")
227
  .mark_line(color="gray", opacity=0.5)
228
  .encode(
229
+ x=alt.X(cols[0], scale=alt.Scale(domain=(min_time, max_time))),
230
  y=cols[1],
231
  )
232
  )
233
  dots = (
234
  alt.Chart(df)
235
  .mark_circle(size=50, opacity=1)
236
+ .encode(
237
+ x=alt.X(cols[0], scale=alt.Scale(domain=(min_time, max_time))),
238
+ y=cols[1],
239
+ tooltip=["transcript"],
240
+ )
241
  )
242
  st.altair_chart((lines + dots).interactive(), use_container_width=True)
243
 
 
254
  alt.Chart(df, title="speedup based on information rate")
255
  .mark_line()
256
  .encode(
257
+ x=alt.X(cols[0], scale=alt.Scale(domain=(min_time, max_time))),
258
  y=alt.Y(
259
  cols[1],
260
  scale=alt.Scale(
 
283
  if submitted:
284
  st.write("original video:")
285
  st.video(url)
286
+ output_path = strike(
287
+ url, speedup_factor, min_speedup, max_speedup, max_num_segments
288
+ )
 
289
  st.write("processed audio:")
290
  st.audio(output_path)