Spaces:

abidlabs
/

transcription-delight

Runtime error

App Files Files Community

abidlabs HF staff commited on Jul 2

Commit

add165b

•

1 Parent(s): 88322f7

changes

Browse files

Files changed (5) hide show

__pycache__/utils.cpython-312.pyc +0 -0
app.py +55 -44
clean.py +12 -0
transcribe.py +1 -0
utils.py +13 -2

__pycache__/utils.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/utils.cpython-312.pyc and b/__pycache__/utils.cpython-312.pyc differ

app.py CHANGED Viewed

@@ -4,51 +4,62 @@ import transcribe
 with gr.Blocks(theme="base") as demo:
     gr.Markdown("<center><h1> 🔊 Transcription Delight </h1></center>")
-    with gr.Tabs(selected="result") as tabs:
-        with gr.Tab("Input"):
-            with gr.Row():
-                with gr.Column():
-                    source = gr.Radio(label="Source type", choices=[("Audio", "audio"), ("Video", "video"), ("YouTube URL", "youtube")], value="audio")
-                    @gr.render(inputs=source)
-                    def show_source(s):
-                        if s == "audio":
-                            source_component = gr.Audio(type="filepath")
-                        elif s == "video":
-                            source_component = gr.Video()
-                        else:
-                            source_component = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
-                            preview = gr.HTML(label="Video preview")
-                            source_component.change(utils.convert_to_embed_url, source_component, preview)
-                        transcribe_btn.click(
-                            lambda : gr.Tabs(selected="result"),
-                            None,
-                            tabs
-                        ).then(
-                            utils.generate_audio,
-                            [source, source_component],
-                            [download_audio],
-                            show_progress="minimal"
-                        ).then(
-                            transcribe.transcribe,
-                            [download_audio],
-                            [preliminary_transcript],
-                            show_progress="hidden"
-                        )
-                with gr.Column():
-                    gr.Dropdown(label="Languages", choices=["(Autodetect)", "English"], value="(Autodetect)")
-                    gr.CheckboxGroup(label="Cleanup Transcript with LLM", choices=["Remove typos", "Separate into paragraphs"])
-                    gr.Checkbox(label="Diarize Speakers (coming soon)", interactive=False)
-            transcribe_btn = gr.Button("Transcribe audio ✨", variant="primary")
-            source.change(utils.transcribe_button, source, transcribe_btn)
-        with gr.Tab("Result", id="result"):
-            with gr.Row():
-                with gr.Column():
-                    download_audio = gr.DownloadButton("Downloading Audio File (please wait...)", variant="primary", interactive=False, size="sm")
-                    preliminary_transcript = gr.Textbox(info="Preliminary transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
-                with gr.Column():
-                    gr.Markdown("*Final transcript will appear here*")
 demo.launch()

 with gr.Blocks(theme="base") as demo:
     gr.Markdown("<center><h1> 🔊 Transcription Delight </h1></center>")
+    gr.Markdown("### Step 1: Generate Raw Transcript")
+    with gr.Row():
+        with gr.Column():
+            source = gr.Radio(label="Source type", choices=[("Audio", "audio"), ("Video", "video"), ("YouTube URL", "youtube")], value="audio")
+            @gr.render(inputs=source)
+            def show_source(s):
+                if s == "audio":
+                    source_component = gr.Audio(type="filepath")
+                elif s == "video":
+                    source_component = gr.Video()
+                else:
+                    source_component = gr.Textbox(placeholder="https://www.youtube.com/watch?v=44vi31hehw4")
+                    preview = gr.HTML(label="Video preview")
+                    source_component.change(utils.convert_to_embed_url, source_component, preview)
+                # transcribe_btn.click(
+                #     lambda : gr.Tabs(selected="result"),
+                #     None,
+                #     tabs
+                # ).then(
+                #     utils.generate_audio,
+                #     [source, source_component],
+                #     [download_audio],
+                #     show_progress="minimal"
+                # ).then(
+                #     transcribe.transcribe,
+                #     [download_audio],
+                #     [preliminary_transcript],
+                #     show_progress="hidden"
+                # )
+        with gr.Column():
+            transcribe_btn = gr.Button("Transcribe audio 📜", variant="primary")
+            preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
+    source.change(utils.transcribe_button, source, transcribe_btn)
+    gr.Markdown("### Step 2: Clean with an LLM")
+    with gr.Row():
+        with gr.Column():
+            cleanup_options = gr.CheckboxGroup(label="Cleanup Transcript with LLM", choices=["Remove typos", "Separate into paragraphs"])
+            llm_prompt = gr.Textbox(label="LLM Prompt", visible=False, lines=3)
+            cleanup_options.change(
+                utils.generate_prompt,
+                cleanup_options,
+                llm_prompt
+            )
+        with gr.Column():
+            clean_btn = gr.Button("Clean transcript ✨", variant="primary", interactive=False)
+            gr.Markdown("*Final transcript will appear here*")
+        # with gr.Tab("Result", id="result"):
+        #     with gr.Row():
+        #         with gr.Column():
+        #             download_audio = gr.DownloadButton("Downloading Audio File (please wait...)", variant="primary", interactive=False, size="sm")
+        #             preliminary_transcript = gr.Textbox(info="Raw transcript", lines=10, show_copy_button=True, show_label=False, interactive=False)
+        #         with gr.Column():
 demo.launch()

clean.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from huggingface_hub import InferenceClient
+MODEL_NAME = "meta-llama/Meta-Llama-3-70b-Instruct"
+def clean_transcript(prompt, transcript):
+    messages = [
+        {"role": "user", "content": PROMPT}
+    ]
+    client = InferenceClient(model=MODEL_NAME)
+    for c in client.chat_completion(messages, max_tokens=200, stream=True):
+        token = c.choices[0].delta.content
+        print(token, end="")

transcribe.py CHANGED Viewed

@@ -25,3 +25,4 @@ def transcribe_audio_in_chunks(audio_path, chunk_length_ms):
         transcription = transcribe_segment(chunk, i)
         yield transcription


25	transcription = transcribe_segment(chunk, i)
26	yield transcription
27
28	+

utils.py CHANGED Viewed

@@ -43,9 +43,9 @@ def convert_video_to_audio(input_file):
 def transcribe_button(source):
     if source == "audio":
-        return gr.Button("Transcribe audio ✨")
     else:
-        return gr.Button("Transcribe video ✨")
 def generate_audio(source, source_file):
     if source == "audio":
@@ -57,3 +57,14 @@ def generate_audio(source, source_file):
         gr.Info("Downloading audio from YouTube...")
         audio_file = download_audio_from_youtube(source_file)
     return gr.DownloadButton("Downloading Audio File", value=audio_file, interactive=True)

 def transcribe_button(source):
     if source == "audio":
+        return gr.Button("Transcribe audio 📜")
     else:
+        return gr.Button("Transcribe video 📜")
 def generate_audio(source, source_file):
     if source == "audio":
         gr.Info("Downloading audio from YouTube...")
         audio_file = download_audio_from_youtube(source_file)
     return gr.DownloadButton("Downloading Audio File", value=audio_file, interactive=True)
+def generate_prompt(cleanup):
+    if not cleanup:
+        return gr.Textbox(visible=False)
+    elif cleanup == ["Remove typos"]:
+        return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Remove the typos from the transcript.")
+    elif cleanup == ["Separate into paragraphs"]:
+        return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Separate the transcript into paragraphs based on logical breaks.")
+    elif cleanup == ["Remove typos", "Separate into paragraphs"]:
+        return gr.Textbox(visible=True, placeholder="The following is a raw transcript from an automatic transcription system. Remove the typos and separate the transcript into paragraphs based on logical breaks.")