child_speech / app.py
colerobertson's picture
Upload folder using huggingface_hub
5b1f241 verified
raw
history blame contribute delete
766 Bytes
#test correct replication of speaker phonemes
from transformers import pipeline
pipe = pipeline(
task="zero-shot-audio-classification", model="laion/clap-htsat-unfused"
)
import numpy as np
import gradio as gr
def get_labels(target):
return [f"An adult speaking.", f"A child speaking."]
def classify_audio(audio, target=None):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
# get labels
candidate_labels = get_labels(target)
preds = pipe(y, candidate_labels=candidate_labels)
outputs = {}
for p in preds:
outputs[p["label"]] = p["score"]
return outputs
demo = gr.Interface(
fn=classify_audio, inputs=[gr.Audio(source="microphone")], outputs=gr.outputs.Label()
)
demo.launch(debug=False)