Spaces:

Shubham09
/

samplewhisper

Runtime error

App Files Files Community

samplewhisper / app.py

Shubham09

Update app.py

84bc308 about 2 years ago

raw

history blame contribute delete

2.96 kB

	import nltk
	import librosa
	import torch
	import gradio as gr
	from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer
	nltk.download("punkt")
	from transformers import pipeline
	import scipy.io.wavfile
	import soundfile as sf
	from huggingface_hub import HfApi, CommitOperationAdd, CommitOperationDelete

	model_name = "Shubham09/whisper31filescheck"
	processor = WhisperProcessor.from_pretrained(model_name,task="transcribe")
	#tokenizer = WhisperTokenizer.from_pretrained(model_name)
	model = WhisperForConditionalGeneration.from_pretrained(model_name)

	def load_data(input_file):

	#reading the file
	speech, sample_rate = librosa.load(input_file)
	#make it 1-D
	if len(speech.shape) > 1:
	speech = speech[:,0] + speech[:,1]
	#Resampling the audio at 16KHz
	if sample_rate !=16000:
	speech = librosa.resample(speech, sample_rate,16000)
	return speech

	# def write_to_file(input_file):
	# fs = 16000
	# sf.write("my_Audio_file.flac",input_file, fs)
	# api = HfApi()
	# operations = [
	# CommitOperationAdd(path_in_repo="my_Audio_file.flac", path_or_fileobj="Shubham09/whisper31filescheck/repo/my_Audio_file.flac"),
	# # CommitOperationAdd(path_in_repo="weights.h5", path_or_fileobj="~/repo/weights-final.h5"),
	# # CommitOperationDelete(path_in_repo="old-weights.h5"),
	# # CommitOperationDelete(path_in_repo="logs/"),



	#scipy.io.wavfile.write("microphone-result.wav")
	# with open("microphone-results.wav", "wb") as f:
	# f.write(input_file.get_wav_data())
	# import base64
	# wav_file = open("temp.wav", "wb")
	# decode_string = base64.b64decode(input_file)
	# wav_file.write(decode_string)



	pipe = pipeline(model="Shubham09/whisper31filescheck") # change to "your-username/the-name-you-picked"

	def asr_transcript(input_file):
	#audio = "Shubham09/whisper31filescheck/repo/my_Audio_file.flac"
	text = pipe(input_file)["text"]
	return text

	# speech = load_data(input_file)
	# #Tokenize
	# input_features = processor(speech).input_features #, padding="longest" , return_tensors="pt"
	# #input_values = tokenizer(speech, return_tensors="pt").input_values
	# #Take logits
	# logits = model(input_features).logits
	# #Take argmax
	# predicted_ids = torch.argmax(logits, dim=-1)
	# #Get the words from predicted word ids
	# transcription = processor.batch_decode(predicted_ids)
	# #Correcting the letter casing
	# #transcription = correct_casing(transcription.lower())
	# return transcription

	gr.Interface(asr_transcript,
	inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker"),
	outputs = gr.outputs.Textbox(label="Output Text"),
	title="ASR using Whisper",
	description = "This application displays transcribed text for given audio input",
	examples = [["Actuator.wav"], ["anomalies.wav"]], theme="grass").launch(share=True)