from transformers import pipeline
from sessions import sessions
import torchaudio
import torchaudio.transforms as T
import gradio as gr

pipe = pipeline(
    "audio-classification",
    model="BilalHasan/distilhubert-finetuned-ravdess",
)

audio_batch = []
def split_audio(array):
    len_of_each_array = 30 * 16000
    arr1, arr2 = array[0: len_of_each_array], array[int(len_of_each_array / 2):]
    audio_batch.append(arr1)
    if len(arr2) > len_of_each_array:
        split_audio(arr2)
    else:
        audio_batch.append(arr2)
    return audio_batch


def prediction(path):
    predictions = []
    array, sr = torchaudio.load(path)
    resampler = T.Resample(sr, 16000)
    resampled_audio = resampler(array)
    audio_batch = split_audio(resampled_audio[0].numpy())
    for i in range(len(audio_batch)):
        predictions.append(pipe(audio_batch[i])[0]['label'])
    mood = max(set(predictions), key = predictions.count)
    if mood in ['neutral', 'calm', 'happy', 'surprised']:
        mood = 'other'
    session = sessions.mood2session[mood]
    return mood, session


demo = gr.Interface(
    fn=prediction,
    inputs=[gr.Audio(type="filepath")],
    outputs=[gr.Textbox(label="Mood"), gr.Textbox(label="Recommended Yoga Session")]
)
demo.launch()