Spaces:

alexbuz
/

voice_transcriber

Running

App Files Files Community

alex buz commited on Jul 17

Commit

1cd886e

•

1 Parent(s): da77537

fix

Browse files

Files changed (7) hide show

app copy 2.py +39 -52
app copy 3.py +0 -53
app copy 4.py +0 -64
app copy 5.py +0 -59
app copy 6.py +0 -47
app copy.py +14 -49
requirements.txt +84 -0

app copy 2.py CHANGED Viewed

@@ -1,57 +1,44 @@
 import gradio as gr
-import speech_recognition as sr
-import os
-def transcribe_audio(file_path):
-    """Transcribes audio to text using the speech_recognition library."""
-    recognizer = sr.Recognizer()
-    with sr.AudioFile(file_path) as source:
-        audio_data = recognizer.record(source)
-    try:
-        text = recognizer.recognize_google(audio_data)
-        return text
-    except sr.UnknownValueError:
-        return "Google Speech Recognition could not understand audio"
-    except sr.RequestError as e:
-        return f"Could not request results from Google Speech Recognition service; {e}"
-def handle_transcription(file_info):
-    """Handle transcription after recording."""
-    if file_info is None:
-        return f" 111 No audio recorded or file not found:  {file_info}"
-    print (file_info)
-    file_path = file_info
-    if os.path.exists(file_path):
-        return transcribe_audio(file_path)
-    return f"222 No audio recorded or file not found: {file_info}"
 with gr.Blocks() as demo:
-    gr.Markdown("### Voice Recorder and Transcriber")
-    audio_box = gr.Audio(label="Record Audio", sources="microphone", type="filepath", elem_id='audio')
-    with gr.Row():
-        record_btn = gr.Button('Record/Stop')
-        transcribe_btn = gr.Button('Transcribe')
-    output_text = gr.Textbox(label="Transcription Output")
-    def manage_record(recording_state):
-        """Toggle recording and manage UI updates."""
-        return not recording_state, "Stop" if not recording_state else "Record"
-    state = gr.State(False)  # False indicates not recording, True indicates recording
-    record_btn.click(
-        fn=manage_record,
-        inputs=state,
-        outputs=[state, record_btn],
-        js="document.getElementById('audio').value = null; document.getElementById('audio').click();"
-    )
-    transcribe_btn.click(
-        fn=handle_transcription,
-        inputs=audio_box,
-        outputs=output_text
     )
-demo.launch(debug=True)

 import gradio as gr
+from transformers import pipeline
+import numpy as np
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
+def transcribe(audio):
+    if audio is None:
+        return "No audio recorded."
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+def answer(transcription):
+    # This is a placeholder. In a real scenario, you'd have a predefined context or retrieve it based on the transcription.
+    context = "Gradio is a Python library for building machine learning web apps. It was created to make it easy for machine learning developers to demo their work."
+    result = qa_model(question=transcription, context=context)
+    return result['answer']
+def process_audio(audio):
+    transcription = transcribe(audio)
+    answer_result = answer(transcription)
+    return transcription, answer_result
 with gr.Blocks() as demo:
+    gr.Markdown("# Audio Transcription and Question Answering")
+    audio_input = gr.Audio(label="Audio Input", sources=["microphone"])
+    transcription_output = gr.Textbox(label="Transcription")
+    answer_output = gr.Textbox(label="Answer Result")
+    submit_button = gr.Button("Submit")
+    submit_button.click(
+        fn=process_audio,
+        inputs=[audio_input],
+        outputs=[transcription_output, answer_output]
     )
+demo.launch()

app copy 3.py DELETED Viewed

@@ -1,53 +0,0 @@
-import gradio as gr
-import speech_recognition as sr
-import os
-def transcribe_audio(file_path):
-    """Transcribes audio to text using the speech_recognition library."""
-    recognizer = sr.Recognizer()
-    with sr.AudioFile(file_path) as source:
-        audio_data = recognizer.record(source)
-    try:
-        text = recognizer.recognize_google(audio_data)
-        return text
-    except sr.UnknownValueError:
-        return "Google Speech Recognition could not understand audio"
-    except sr.RequestError as e:
-        return f"Could not request results from Google Speech Recognition service; {e}"
-def handle_transcription(file_info):
-    """Handle transcription after recording."""
-    if file_info is None:
-        return "No audio recorded or file not found."
-    file_path = file_info
-    if os.path.exists(file_path):
-        return transcribe_audio(file_path)
-    return "No audio recorded or file not found."
-with gr.Blocks() as demo:
-    gr.Markdown("### Voice Recorder and Transcriber")
-    audio_box = gr.Audio(label="Record Audio", type="filepath", elem_id='audio')
-    with gr.Row():
-        record_btn = gr.Button('Record')
-        transcribe_btn = gr.Button('Transcribe')
-    output_text = gr.Textbox(label="Transcription Output")
-    def toggle_record(button_text):
-        """Toggle the button text and manage the recording."""
-        return "Stop" if button_text == "Record" else "Record"
-    record_btn.click(
-        fn=toggle_record,
-        inputs=record_btn,
-        outputs=record_btn
-    )
-    transcribe_btn.click(
-        fn=handle_transcription,
-        inputs=audio_box,
-        outputs=output_text
-    )
-demo.launch(debug=True)

app copy 4.py DELETED Viewed

@@ -1,64 +0,0 @@
-import gradio as gr
-import speech_recognition as sr
-import os
-def transcribe_audio(file_path):
-    """Transcribes audio to text using the speech_recognition library."""
-    recognizer = sr.Recognizer()
-    with sr.AudioFile(file_path) as source:
-        audio_data = recognizer.record(source)
-    try:
-        text = recognizer.recognize_google(audio_data)
-        return text
-    except sr.UnknownValueError:
-        return "Google Speech Recognition could not understand audio"
-    except sr.RequestError as e:
-        return f"Could not request results from Google Speech Recognition service; {e}"
-def handle_transcription(file_info):
-    """Handle transcription after recording."""
-    if file_info is None:
-        return "No audio recorded or file not found."
-    file_path = file_info
-    if os.path.exists(file_path):
-        return transcribe_audio(file_path)
-    return "No audio recorded or file not found."
-with gr.Blocks() as demo:
-    gr.Markdown("### Voice Recorder and Transcriber")
-    audio_box = gr.Audio(label="Record Audio",  sources="microphone", type="filepath", elem_id='audio')
-    with gr.Row():
-        record_btn = gr.Button('Record')
-        transcribe_btn = gr.Button('Transcribe')
-    output_text = gr.Textbox(label="Transcription Output")
-    def create_toggle_record(record_btn):
-        print(111)
-        def toggle_record( button_text):
-            if button_text == "Record":
-                print(222)
-                print(audio_box)
-                audio_box.start_recording()
-                return "Stop"  # Return new button text (Stop)
-            else:
-                audio_box.stop_recording()
-                return "Record"  # Return new button text (Record)
-        return toggle_record
-    # Create the closure and connect it to the button click
-    toggle_record_fn = create_toggle_record(record_btn)
-    record_btn.click(
-        fn=toggle_record_fn,
-        inputs=[record_btn],  # Pass only the button (no need for text)
-        outputs=record_btn
-    )
-    transcribe_btn.click(
-        fn=handle_transcription,
-        inputs=audio_box,
-        outputs=output_text
-    )
-demo.launch(debug=True)

app copy 5.py DELETED Viewed

@@ -1,59 +0,0 @@
-import gradio as gr
-import time
-import speech_recognition as sr
-def transcribe(audio):
-    if audio is None:
-        return "No audio recorded."
-    recognizer = sr.Recognizer()
-    with sr.AudioFile(audio) as source:
-        audio_data = recognizer.record(source)
-    try:
-        text = recognizer.recognize_google(audio_data)
-        return text
-    except sr.UnknownValueError:
-        return "Google Speech Recognition could not understand audio"
-    except sr.RequestError as e:
-        return f"Could not request results from Google Speech Recognition service; {e}"
-def toggle_recording(audio, state):
-    if state == "Idle":
-        return None, "Recording", "Recording... Click 'Stop' when finished."
-    else:
-        time.sleep(1)  # Small delay to ensure audio is processed
-        if audio is not None:
-            transcription = transcribe(audio)
-            return None, "Idle", transcription
-        else:
-            return None, "Idle", "No audio recorded."
-with gr.Blocks() as demo:
-    audio = gr.Audio(sources="microphone", type="filepath", elem_id="audio-component")
-    button = gr.Button("Record", elem_id="record-button")
-    state = gr.State("Idle")
-    output = gr.Textbox(label="Transcription")
-    button.click(
-        fn=toggle_recording,
-        inputs=[audio, state],
-        outputs=[audio, state, output],
-        js="""
-        async (audio, state) => {
-            const audioEl = document.querySelector('#audio-component audio');
-            const recordButton = document.querySelector('#record-button');
-            if (state === "Idle") {
-                await audio.startRecording();
-                recordButton.textContent = "Stop";
-                return [null, "Recording", "Recording... Click 'Stop' when finished."];
-            } else {
-                await audio.stopRecording();
-                recordButton.textContent = "Record";
-                return [await audio.getValue(), "Idle", "Processing..."];
-            }
-        }
-        """
-    )
-demo.queue().launch(debug=True)

app copy 6.py DELETED Viewed

@@ -1,47 +0,0 @@
-import gradio as gr
-def click_js():
-    return """
-    function(audio_btn, update_status) {
-        const recordBtn = document.querySelector('#audio button');
-        if (audio_btn == 'Speak') {
-            recordBtn.click();  // Start recording
-            update_status('Stop');  // Update the button to show 'Stop'
-            return 'Recording...';
-        } else {
-            recordBtn.click();  // Stop recording
-            update_status('Speak');  // Reset button text
-            return new Promise(resolve => {
-                setTimeout(() => {  // Wait a small delay to ensure recording has stopped
-                    resolve('Done recording');
-                }, 500);
-            });
-        }
-    }
-    """
-def transcribe(recording_status):
-    if recording_status == 'Done recording':
-        print('Transcribing...')
-        return 'Success'
-    else:
-        return recording_status
-with gr.Blocks() as demo:
-    msg = gr.Textbox()
-    audio_box = gr.Audio(label="Audio", sources="microphone", type="filepath", elem_id='audio')
-    with gr.Row():
-        audio_btn = gr.Button('Speak')
-        clear = gr.Button("Clear")
-    audio_btn.click(
-        js=click_js(),
-        inputs=[audio_btn],
-        outputs=[audio_btn, msg],
-        fn=transcribe
-    )
-    clear.click(lambda: "", inputs=None, outputs=msg, queue=False)
-demo.launch(debug=True)

app copy.py CHANGED Viewed

@@ -1,56 +1,21 @@
 import gradio as gr
-import speech_recognition as sr
-from pprint import pprint as pp
-import os
-def transcribe_audio(file_path):
-    """Transcribes audio to text using the speech_recognition library."""
-    recognizer = sr.Recognizer()
-    with sr.AudioFile(file_path) as source:
-        audio_data = recognizer.record(source)
-    try:
-        # Using Google's speech recognition service. Note: It requires internet.
-        text = recognizer.recognize_google(audio_data)
-        return text
-    except sr.UnknownValueError:
-        return "Google Speech Recognition could not understand audio"
-    except sr.RequestError as e:
-        return f"Could not request results from Google Speech Recognition service; {e}"
-def manage_record():
-    """Toggle recording and manage UI updates."""
-    js_code = """
-    const btn = document.getElementById('record_btn');
-    const recordingText = 'Stop';
-    const idleText = 'Record';
-    if (btn.textContent.includes(idleText)) {
-        btn.textContent = recordingText;
-    } else {
-        btn.textContent = idleText;
-    }
-    """
-    return gr.update(js=js_code)
-def handle_transcription(file_info):
-    """Handle transcription after recording."""
-    print(file_info)
-    file_path = file_info
-    print(file_path )
-    if os.path.exists(file_path):
-        return transcribe_audio(file_path)
-    return "No audio recorded or file not found."
-with gr.Blocks() as demo:
-    gr.Markdown("### Voice Recorder and Transcriber")
-    audio_box = gr.Audio(label="Record Audio", sources="microphone", type="filepath", elem_id='audio')
-    with gr.Row():
-        record_btn = gr.Button('Record/Stop', elem_id='record_btn')
-        transcribe_btn = gr.Button('Transcribe')
-    output_text = gr.Textbox(label="Transcription Output")
-    record_btn.click(fn=manage_record)
-    transcribe_btn.click(fn=handle_transcription, inputs=audio_box, outputs=output_text)
-demo.launch(debug=True)

 import gradio as gr
+from transformers import pipeline
+import numpy as np
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
+def transcribe(audio):
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+demo = gr.Interface(
+    transcribe,
+    gr.Audio(sources=["microphone"]),
+    "text",
+)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,84 @@

+aiofiles==23.2.1
+altair==5.3.0
+annotated-types==0.7.0
+anyio==4.4.0
+attrs==23.2.0
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+contourpy==1.2.1
+cycler==0.12.1
+dnspython==2.6.1
+email_validator==2.2.0
+fastapi==0.111.1
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.15.4
+fonttools==4.53.1
+fsspec==2024.6.1
+gradio==4.29.0
+gradio_client==0.16.1
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.23.5
+idna==3.7
+importlib_resources==6.4.0
+intel-openmp==2021.4.0
+Jinja2==3.1.4
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.1
+mdurl==0.1.2
+mkl==2021.4.0
+mpmath==1.3.0
+networkx==3.3
+numpy==1.26.4
+orjson==3.10.6
+packaging==24.1
+pandas==2.2.2
+pillow==10.4.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.19.0
+ruff==0.5.2
+safetensors==0.4.3
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+SpeechRecognition==3.10.4
+starlette==0.37.2
+sympy==1.13.0
+tbb==2021.13.0
+tokenizers==0.19.1
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.3.1
+torchaudio==2.3.1
+tqdm==4.66.4
+transformers==4.42.4
+typer==0.12.3
+typing_extensions==4.12.2
+tzdata==2024.1
+urllib3==2.2.2
+uvicorn==0.30.1
+watchfiles==0.22.0
+websockets==11.0.3