Spaces:

Kilos1
/

real_time_speech_recognition

Sleeping

Create app.py

be917bf verified 4 months ago

1.42 kB

	# Import the Gradio library for creating web interfaces
	import gradio as gr
	# Import the pipeline module from transformers for using pre-trained models
	from transformers import pipeline
	# Import numpy for numerical operations
	import numpy as np

	# Initialize the automatic speech recognition pipeline using the Whisper base English model
	transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")

	# Define the transcription function that takes audio input and returns transcribed text
	def transcribe(stream,new_chunk):
	# Unpack the audio tuple into sample rate (sr) and audio data (y)
	sr, y = new_chunk
	# Convert the audio data to 32-bit float
	y = y.astype(np.float32)
	# Normalize the audio data to be between -1 and 1
	y /= np.max(np.abs(y))

	if stream is not None:
	stream = np.concatenate([stream, y])
	else:
	stream = y

	# Use the transcriber to convert audio to text and return the result
	return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]


	# Create a Gradio interface for the transcribe function
	demo = gr.Interface(
	# Specify the function to run
	transcribe,
	# Define the input component as an audio recorder with microphone source
	["state", gr.Audio(sources=["microphone"], streaming=True)],
	# Specify the output component as text
	["state", "text"],
	live = True
	)

	demo.launch()