Spaces:
Runtime error
Runtime error
improve aesthetics
Browse files
app.py
CHANGED
@@ -54,6 +54,9 @@ def divide_into_30s_segments(audio_fpath, seconds_max):
|
|
54 |
|
55 |
return segment_paths, segment_start_times
|
56 |
|
|
|
|
|
|
|
57 |
|
58 |
def transcribe(audio, url, seconds_max):
|
59 |
if url:
|
@@ -61,13 +64,13 @@ def transcribe(audio, url, seconds_max):
|
|
61 |
segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
|
62 |
|
63 |
audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio(sampling_rate=16000))
|
64 |
-
print(audio_dataset)
|
65 |
-
print(audio_dataset[0])
|
66 |
pred = pipe(audio_dataset["audio"])
|
67 |
text = ""
|
68 |
n_segments = len(segment_start_times)
|
69 |
for i, (seconds, output) in enumerate(zip(segment_start_times, pred)):
|
70 |
-
text += f"[Segment {i}/{n_segments}, start time {get_timestamp(seconds)}]\n
|
|
|
|
|
71 |
return text
|
72 |
|
73 |
else:
|
@@ -77,9 +80,9 @@ def transcribe(audio, url, seconds_max):
|
|
77 |
iface = gr.Interface(
|
78 |
fn=transcribe,
|
79 |
inputs=[
|
80 |
-
gr.Audio(source="microphone", type="filepath"),
|
81 |
-
gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed", label="YouTube URL"),
|
82 |
-
gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe")
|
83 |
],
|
84 |
outputs="text",
|
85 |
title="Whisper Small Swedish",
|
|
|
54 |
|
55 |
return segment_paths, segment_start_times
|
56 |
|
57 |
+
def get_translation(text):
|
58 |
+
# TODO: Make API call to Google Translate to get English translation
|
59 |
+
return "..."
|
60 |
|
61 |
def transcribe(audio, url, seconds_max):
|
62 |
if url:
|
|
|
64 |
segment_paths, segment_start_times = divide_into_30s_segments(fpath, seconds_max)
|
65 |
|
66 |
audio_dataset = Dataset.from_dict({"audio": segment_paths}).cast_column("audio", Audio(sampling_rate=16000))
|
|
|
|
|
67 |
pred = pipe(audio_dataset["audio"])
|
68 |
text = ""
|
69 |
n_segments = len(segment_start_times)
|
70 |
for i, (seconds, output) in enumerate(zip(segment_start_times, pred)):
|
71 |
+
text += f"[Segment {i+1}/{n_segments}, start time {get_timestamp(seconds)}]\n"
|
72 |
+
text += f"{output['text']}\n"
|
73 |
+
text += f"[Translation]\n{get_translation(output['text'])}\n\n"
|
74 |
return text
|
75 |
|
76 |
else:
|
|
|
80 |
iface = gr.Interface(
|
81 |
fn=transcribe,
|
82 |
inputs=[
|
83 |
+
gr.Audio(source="microphone", type="filepath", label="Transcribe from Microphone"),
|
84 |
+
gr.Text(max_lines=1, placeholder="Enter YouTube Link with Swedish speech to be transcribed", label="Transcribe from YouTube URL"),
|
85 |
+
gr.Slider(minimum=30, maximum=300, value=30, step=30, label="Number of seconds to transcribe from YouTube URL")
|
86 |
],
|
87 |
outputs="text",
|
88 |
title="Whisper Small Swedish",
|