Spaces:
Runtime error
Runtime error
import streamlit as st | |
import soundfile as sf | |
import librosa | |
from transformers import HubertForCTC, Wav2Vec2Processor , pipeline , Wav2Vec2ForCTC , Wav2Vec2Tokenizer | |
import torch | |
import spacy | |
from spacy import displacy | |
import en_core_web_sm | |
import spacy.cli | |
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer | |
import nltk | |
from nltk import tokenize | |
nltk.download('punkt') | |
import spacy_streamlit | |
from datasets import load_dataset | |
from transformers import pipeline | |
st.title('Audio-to-Text') | |
audio_file = st.file_uploader('Upload Audio' , type=['wav' , 'mp3','m4a']) | |
st.subheader( 'Please select any of the NLP tasks') | |
if st.button('Audio Transcription'): | |
if audio_file is not None: | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
summary_list = [str(sentence) for sentence in text] | |
result = ' '.join(summary_list) | |
st.markdown(result) | |
else: | |
st.error('please upload the audio file') | |
if st.button('Summarize'): | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
summary_list = [str(sentence) for sentence in text] | |
result = ' '.join(summary_list) | |
summarize = pipeline("summarization" , model='facebook/bart-large-cnn') | |
st.markdown(summarize(result)[0]['summary_text']) | |
if st.button('Sentiment Analysis'): | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
summary_list = [str(sentence) for sentence in text] | |
result = ' '.join(summary_list) | |
nlp_sa = pipeline("sentiment-analysis") | |
st.markdown(nlp_sa(result)) | |
if st.button('Audio Classification'): | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
summary_list = [str(sentence) for sentence in text] | |
result = ' '.join(summary_list) | |
dataset = load_dataset("anton-l/superb_demo", "er", split="session1") | |
classifier = pipeline("audio-classification", model="superb/wav2vec2-base-superb-er") | |
labels = classifier(dataset[0]["file"], top_k=5) | |
st.markdown(labels) | |
if st.button('Name Entity Recognition'): | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
summary_list = [str(sentence) for sentence in text] | |
result = ' '.join(summary_list) | |
nlp = spacy.load('en_core_web_sm') | |
doc=nlp(result) | |
spacy_streamlit.visualize_ner(doc, labels=nlp.get_pipe("ner").labels, title= "List of Entities") | |
tokenizer = AutoTokenizer.from_pretrained("t5-base") | |
def load_model(): | |
model = AutoModelForSeq2SeqLM.from_pretrained("t5-base") | |
return model | |
model1 = load_model() | |
st.subheader('Select your source and target language below.') | |
source_lang = st.selectbox("Source language",['English']) | |
target_lang = st.selectbox("Target language",['German','French']) | |
if st.button('Translate'): | |
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
model = HubertForCTC.from_pretrained("facebook/hubert-large-ls960-ft") | |
speech, rate = librosa.load(audio_file, sr=16000) | |
input_values = processor(speech, return_tensors="pt", padding="longest", sampling_rate=rate).input_values | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
text = processor.batch_decode(predicted_ids) | |
summary_list = [str(sentence) for sentence in text] | |
result = ' '.join(summary_list) | |
prefix = 'translate '+str(source_lang)+' to '+str(target_lang) | |
sentence_token = tokenize.sent_tokenize(result) | |
output = tokenizer([prefix+sentence for sentence in sentence_token], padding=True, return_tensors="pt") | |
translated_id = model1.generate(output["input_ids"], attention_mask=output['attention_mask'], max_length=10000) | |
translated_word = tokenizer.batch_decode(translated_id, skip_special_tokens=True) | |
st.subheader('Translated Text') | |
st.write(' '.join(translated_word)) | |