File size: 4,289 Bytes
58a9546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65d25f2
 
 
 
5e2210f
 
 
58a9546
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle 
import joblib
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load the LSTM model
lstm_model = load_model('lstm_model.h5')

# Load the Tokenizer used during training
with open('tokenizer.pkl', 'rb') as tokenizer_file:
    tokenizer = pickle.load(tokenizer_file)

class_mapping = {"Happy": 0, "Sad": 1, "Calm": 2, "Anger": 3}
numerical_to_label = {v: k for k, v in class_mapping.items()}

# Load the KNN model
knn_model = joblib.load('knn_model.joblib')

# Load the dataset
df = pd.read_csv('df1.csv')
df = df.dropna()

# Preprocess for KNN
audio_feature_columns = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo',
       'duration_ms', 'time_signature']

audio_features = df[audio_feature_columns]
mood_cats = df[['mood_cats']]
audio_features_scaled = StandardScaler().fit_transform(audio_features)
audio_features_df = pd.DataFrame(audio_features_scaled, columns=audio_feature_columns)
combined_features = pd.concat([mood_cats, audio_features_df], axis=1)

# Calculate similarity matrix for content-based
audio_features_scaled_content = StandardScaler().fit_transform(audio_features)
combined_features_content = pd.concat([mood_cats, pd.DataFrame(audio_features_scaled_content)], axis=1)
similarity_matrix = cosine_similarity(combined_features_content)

def recommend_cont(song_index, num_recommendations=5):
    song_similarity = similarity_matrix[song_index]
    similar_songs = sorted(list(enumerate(song_similarity)), key=lambda x: x[1], reverse=True)[1:num_recommendations+1]
    recommended_song_indices = [idx for idx, similarity in similar_songs]
    recommended_songs = df.iloc[recommended_song_indices].copy()
    recommended_songs['score'] = [similarity for idx, similarity in similar_songs]
    return recommended_songs

def recommend_knn(query_index, n_recommendations=5):
    distances, indices = knn_model.kneighbors(combined_features.iloc[query_index].values.reshape(1, -1), n_neighbors=n_recommendations)
    recommended_songs = df.iloc[indices.flatten()].copy()
    recommended_songs['score'] = 1 / (1 + distances.flatten())
    return recommended_songs

def hybrid_recommendation(song_index, top_n=10):
    content_based_recs = recommend_cont(song_index, top_n)
    knn_based_recs = recommend_knn(song_index, top_n)
    combined_recs = pd.concat([content_based_recs, knn_based_recs])
    
    # Convert 'score' column to numeric
    combined_recs['score'] = pd.to_numeric(combined_recs['score'], errors='coerce')
    
    # Use maximum value for each group instead of mean
    hybrid_recs = combined_recs.groupby(combined_recs.index)['score'].max().sort_values(ascending=False).head(top_n)
    
    return hybrid_recs

# Streamlit app
st.title('Music Recommendation and Emotion Detection')

# Emotion Detection
st.subheader('Emotion Detection from Song Lyrics')
user_input = st.text_input('Enter a Text:')
prediction = None

if st.button('Predict Emotion'):
    sequence = tokenizer.texts_to_sequences([user_input])
    padded_sequence = pad_sequences(sequence, maxlen=50)
    prediction = lstm_model.predict(padded_sequence)

if prediction is not None:
    for i in range(len(prediction[0])):
        label = numerical_to_label[i]
        probability = prediction[0][i]
        threshold = 0.5
        if probability > threshold:
            st.write(f'Predicted Emotion: {label}')

# Music Recommendation
st.subheader('Music Recommendation')
song_index_to_recommend = st.number_input('Enter song index:', min_value=0, max_value=len(df)-1, value=0)
hybrid_recs = hybrid_recommendation(song_index_to_recommend)

st.write("Hybrid Recommendations:")
if not hybrid_recs.empty:
    for index in hybrid_recs.index:
        st.write(f"Song Index: {index}, Title: {df.iloc[index]['track_name']}, Artist: {df.iloc[index]['track_artist']}, Score: {hybrid_recs.loc[index, 'score']}")
else:
    st.write("No recommendations found.")