Neprox commited on
Commit
a3c12f3
1 Parent(s): 51423ee

improve aesthetics

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -54,6 +54,9 @@ def divide_into_30s_segments(audio_fpath, seconds_max):
54
 
55
  return segment_paths, segment_start_times
56
 
 
 
 
57
 
58
  def transcribe(audio, url, seconds_max):
59
  if url:
@@ -61,13 +64,13 @@ def transcribe(audio, url, seconds_max):
61
  segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
62
 
63
  audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio(sampling_rate=16000))
64
- print(audio_dataset)
65
- print(audio_dataset[0])
66
  pred = pipe(audio_dataset["audio"])
67
  text = ""
68
  n_segments = len(segment_start_times)
69
  for i, (seconds, output) in enumerate(zip(segment_start_times, pred)):
70
- text += f"[Segment {i}/{n_segments}, start time {get_timestamp(seconds)}]\n{output['text']}\n"
 
 
71
  return text
72
 
73
  else:
@@ -77,9 +80,9 @@ def transcribe(audio, url, seconds_max):
77
  iface = gr.Interface(
78
  fn=transcribe,
79
  inputs=[
80
- gr.Audio(source="microphone", type="filepath"),
81
- gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed", label="YouTube URL"),
82
- gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe")
83
  ],
84
  outputs="text",
85
  title="Whisper Small Swedish",
 
54
 
55
  return segment_paths, segment_start_times
56
 
57
+ def get_translation(text):
58
+ # TODO: Make API call to Google Translate to get English translation
59
+ return "..."
60
 
61
  def transcribe(audio, url, seconds_max):
62
  if url:
 
64
  segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
65
 
66
  audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio(sampling_rate=16000))
 
 
67
  pred = pipe(audio_dataset["audio"])
68
  text = ""
69
  n_segments = len(segment_start_times)
70
  for i, (seconds, output) in enumerate(zip(segment_start_times, pred)):
71
+ text += f"[Segment {i+1}/{n_segments}, start time {get_timestamp(seconds)}]\n"
72
+ text += f"{output['text']}\n"
73
+ text += f"[Translation]\n{get_translation(output['text'])}\n\n"
74
  return text
75
 
76
  else:
 
80
  iface = gr.Interface(
81
  fn=transcribe,
82
  inputs=[
83
+ gr.Audio(source="microphone", type="filepath", label="Transcribe from Microphone"),
84
+ gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed", label="Transcribe from YouTube URL"),
85
+ gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe from YouTube URL")
86
  ],
87
  outputs="text",
88
  title="Whisper Small Swedish",