Spaces:
Sleeping
Sleeping
File size: 21,088 Bytes
c2522bb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 |
import streamlit as st
import numpy as np
import pandas as pd
import requests
import pickle
import os
import altair as alt
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from annotated_text import annotated_text
from utils import load_data_pickle, load_model_pickle, load_data_csv
st.set_page_config(layout="wide")
st.markdown("# Recommendation system")
st.markdown("### What is a Recommendation System ?")
st.info("""**Recommendation systems** are AI algorithms built to **suggest** or **recommend** **products** to consumers.
They are very common in social media platforms such as TikTok, Youtube or Instagram or e-commerce websites as they help improve and personalize a consumer's experience.""")
st.markdown("""There are two methods to build recommendation systems:
- **Content-based filtering**: Recommendations are made based on the user's own preferences
- **Collaborative filtering**: Recommendations are made based on the preferences and behavior of similar users""", unsafe_allow_html=True)
# st.markdown("""Here is an example of **Content-based filtering versus Collaborative filtering** for movie recommendations.""")
st.markdown(" ")
st.markdown(" ")
_, col_img, _ = st.columns(spec=[0.2,0.6,0.2])
with col_img:
st.image("images/rs.png")
st.markdown(" ")
st.markdown("""Common applications of Recommendation systems include:
- **E-Commerce Platforms** 🛍️: Suggest products to users based on their browsing history, purchase patterns, and preferences.
- **Streaming Services** 📽️: Recommend movies, TV shows, or songs based on users' viewing/listening history and preferences.
- **Social Media Platforms** 📱: Suggest friends, groups, or content based on users' connections, interests, and engagement history.
- **Automotive and Navigation Systems** 🗺️: Suggest optimal routes based on real-time traffic conditions, historical data, and user preferences.
""")
st.markdown(" ")
select_usecase = st.selectbox("**Choose a use case**",
["Movie recommendation system 📽️",
"Hotel recommendation system 🛎️"])
st.divider()
#####################################################################################################
# MOVIE RECOMMENDATION SYSTEM #
#####################################################################################################
# Recommendation function
def recommend(movie_name, nb):
n_movies_to_recommend = nb
idx = movies[movies['title'] == movie_name].index[0]
distances, indices = model.kneighbors(csr_data[idx], n_neighbors=n_movies_to_recommend + 1)
idx = list(indices.squeeze())
df = np.take(movies, idx, axis=0)
movies_list = list(df.title[1:])
recommend_movies_names = []
recommend_posters = []
movie_ids = []
for i in movies_list:
temp_movie_id = (movies[movies.title ==i].movie_id).values[0]
movie_ids.append(temp_movie_id)
# fetch poster
try:
poster = fetch_poster(temp_movie_id)
recommend_posters.append(poster)
except:
recommend_posters.append(None)
recommend_movies_names.append(i)
return recommend_movies_names, recommend_posters, movie_ids
# Get poster
def fetch_poster(movie_id):
response = requests.get(f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}')
data = response.json()
return "https://image.tmdb.org/t/p/w500/" + data["poster_path"]
if select_usecase == "Movie recommendation system 📽️":
colors = ["#8ef", "#faa", "#afa", "#fea", "#8ef","#afa"]
api_key = st.secrets["recommendation_system"]["key"]
# Load data
path_data = r"data/movies"
path_models = r"pretrained_models/recommendation_system"
movies_dict = pickle.load(open(os.path.join(path_data,"movies_dict2.pkl"),"rb"))
movies = pd.DataFrame(movies_dict)
movies.drop_duplicates(inplace=True)
vote_info = pickle.load(open(os.path.join(path_data,"vote_info.pkl"),"rb"))
vote = pd.DataFrame(vote_info)
# Load model
model = load_model_pickle(path_models,"model.pkl")
with open(os.path.join(path_data,'csr_data_tf.pkl'), 'rb') as file:
csr_data = pickle.load(file)
# Description of the use case
st.markdown("""## Movie Recommendation System 📽️""")
#st.info(""" """)
st.markdown("""This use case showcases the use of recommender systems for **movie recommendations** using **collaborative filtering**. <br>
The model recommends and ranks movies based on what users, who have also watched the chosen movie, have watched else on the platform. <br>
""", unsafe_allow_html=True)
st.markdown(" ")
# User selection
selected_movie = st.selectbox("**Select a movie**", movies["title"].values[:-3])
selected_nb_movies = st.selectbox("**Select a number of movies to recommend**", np.arange(2,7), index=3)
# Show user selection on the app
c1, c2 = st.columns([0.7,0.3], gap="medium")
with c1:
new_movies = movies.rename({"movie_id":"id"},axis=1).merge(vote, on="id", how="left")
description = new_movies.loc[new_movies["title"]==selected_movie,"description"].to_list()[0]
genre = new_movies.loc[new_movies["title"]==selected_movie,"genre"].to_list()[0]
vote_ = new_movies.loc[new_movies["title"]==selected_movie,"vote_average"].to_list()[0]
vote_count = new_movies.loc[new_movies["title"]==selected_movie,"vote_count"].to_list()[0]
list_genres = [(g.strip(),"",color) for color,g in zip(colors, genre.split(", "))]
st.header(selected_movie, divider="grey")
st.markdown(f"**Synopsis**: {description}")
annotated_text(["**Genre(s)**: ", list_genres])
st.markdown(f"**Rating**: {vote_}:star:")
st.markdown(f"**Votes**: {vote_count}")
st.info(f"You've selected {selected_nb_movies} movies to recommend")
st.markdown(" ")
recommend_button = st.button("**Recommend movies**")
with c2:
try:
poster = fetch_poster(movies.loc[movies["title"]==selected_movie,"movie_id"].to_list()[0])
st.image(poster, width=300)
except:
pass
# Run model and show results
if recommend_button:
st.text("Here are few Recommendations..")
names,posters,movie_ids = recommend(selected_movie, selected_nb_movies)
tab1, tab2 = st.tabs(["View movies", "View genres"])
with tab1:
cols=st.columns(int(selected_nb_movies))
#cols=[col1,col2,col3,col4,col5]
for i in range(0,selected_nb_movies):
with cols[i]:
expander = st.expander("See movie details")
if posters[i] == None:
pass
else:
st.image(posters[i])
st.markdown(f"##### **{i+1}. {names[i]}**")
id = movie_ids[i]
genre = movies.loc[movies["movie_id"]==id,"genre"].to_list()[0]
list_genres = [(g.strip(),"",color) for color,g in zip(colors, genre.split(", "))]
synopsis = movies.loc[movies['movie_id']==id, "description"].to_list()[0]
st.markdown(synopsis)
vote_avg, vote_count = vote[vote["id"] == id].vote_average , vote[vote["id"] == id].vote_count
annotated_text(["**Genre(s)**: ", list_genres])
st.markdown(f"""**Rating**: {list(vote_avg.values)[0]}:star:""")
st.markdown(f"**Votes**: {list(vote_count.values)[0]}")
with tab2:
recommended_genres = movies.loc[movies["movie_id"].isin(movie_ids[:5]),"genre"].to_list()
list_recom_genres = [genre for list_genres in recommended_genres for genre in list_genres.split(", ")]
df_recom_genres = pd.Series(list_recom_genres).value_counts().to_frame().reset_index(names="genre")
df_recom_genres["proportion (%)"] = (100*df_recom_genres["count"]/df_recom_genres["count"].sum())
fig = px.bar(df_recom_genres, x='count', y='genre', color="genre", title='Most recommended genres', orientation="h")
st.plotly_chart(fig, use_container_width=True)
#####################################################################################################
# HOTEL RECOMMENDATION SYSTEM #
#####################################################################################################
# Load scaler with caching
if select_usecase == "Hotel recommendation system 🛎️":
@st.cache_data(ttl=3600)
def get_scaler(df):
scaler = MinMaxScaler()
scaler.fit(df[['Rating', 'Price']])
return scaler
def recommend_hotels_with_location_and_beds(df, preferences, max_recommendations=5):
# Start with the full dataset
filtered_df = df.copy()
# Filter by Location if specified (either city or country)
if 'Location' in preferences and preferences['Location']:
filtered_df = filtered_df[(filtered_df['City'].str.contains(preferences['Location'], case=False, na=False)) |
(filtered_df['Country'].str.contains(preferences['Location'], case=False, na=False))]
# Filter by Number of beds if specified
if 'Number of beds' in preferences:
filtered_df = filtered_df[filtered_df['Number of bed'] == preferences['Number of beds']]
# Filter by Rating if specified
if 'Rating' in preferences:
min_rating, max_rating = preferences['Rating']
filtered_df = filtered_df[filtered_df['Rating'].between(min_rating, max_rating)]
# Filter by Price range if specified
if 'Price' in preferences:
min_price, max_price = preferences['Price']
filtered_df = filtered_df[filtered_df['Price'].between(min_price, max_price)]
# Ensure there are still hotels after filtering
if filtered_df.empty:
# Send a notification if no hotels match the criteria
send_notification("No hotels were found matching the specified criteria.")
return pd.DataFrame(), "No hotels were found matching the specified criteria."
preferences["Rating"] = np.mean(np.array(preferences["Rating"]))
preferences["Price"] = np.mean(np.array(preferences["Price"]))
# Normalize the preferences vector (excluding location and number of beds for similarity calculation)
preferences_vector = np.array([[preferences.get('Rating', 0),
preferences.get('Price', 0)]])
preferences_vector_normalized = scaler.transform(preferences_vector)
# Calculate similarity scores for the filtered hotels
filtered_numerical_features = filtered_df[['Rating', 'Price']]
filtered_numerical_features_normalized = scaler.transform(filtered_numerical_features)
similarity_scores = cosine_similarity(preferences_vector_normalized, filtered_numerical_features_normalized)[0]
# Get the indices of top_n similar hotels
top_indices = similarity_scores.argsort()[-max_recommendations:][::-1]
recommended_indices = filtered_df.iloc[top_indices].index
# Return the recommended hotels with relevant details (including specified columns)
return df.loc[recommended_indices], None
def send_notification(message):
"""
Placeholder function to send a notification.
This function can be replaced with the actual notification mechanism (e.g., email, SMS).
"""
print("Notification:", message)
def country_info(country):
if country == "Thailand":
image = "images/thailand.jpeg"
emoji = "🏝️"
description = """**Description**:
Thailand seamlessly fuses ancient traditions with modern dynamism, creating an unparalleled tapestry for travelers.
Renowned for its warm hospitality, vibrant culture, and delectable cuisine, Thailand offers an unforgettable experience for every adventurer."""
top_places = """
- **Bangkok**: Immerse yourself in the hustle and bustle of Bangkok's streets, adorned with glittering temples and bustling markets. The Grand Palace and Khao San Road showcase the city's unique blend of tradition and modernity.
- **Chiang Mai**: Nestled in the misty mountains of Northern Thailand, Chiang Mai captivates with ancient temples, lush landscapes, and vibrant night markets. The Old City exudes a unique atmosphere, while the surrounding hills offer tranquility.
- **Phuket**: Thailand's largest island, Phuket, beckons beach lovers with its stunning white sands, vibrant nightlife, and water activities. It's a perfect blend of relaxation and excitement."""
if country == "France":
image = "images/france.jpeg"
emoji = "⚜️"
description ="""**Description**:
Indulge in the countries rich tapestry of art, culture, and gastronomy.
From the romantic allure of Paris to the sun-kissed vineyards of Provence, every corner of this diverse country tells a unique story, promising an unforgettable journey for every traveler."""
top_places = """
- **Paris**: Dive into the city's iconic landmarks such as the Eiffel Tower, Louvre Museum, and Notre-Dame Cathedral grace the skyline.
- **Provence**: Visit the stunning Palais des Papes in Avignon, explore the colorful markets of Aix-en-Provence, and unwind in the serene beauty of the Luberon region.
- **Côte d'Azur**: This stunning stretch of the French coastline is a captivating blend of azure waters, picturesque landscapes and charming villages.
"""
if country == "Spain":
image = "images/spain-banner.jpg"
emoji = "☀️"
description = """**Description**:
Embark on an unforgettable journey where tradition and modernity coexist in harmony.
From the lively streets of Barcelona to the sun-soaked beaches of Andalusia, Spain offers a captivating blend of history, culture, and natural beauty.
"""
top_places = """
- **Barcelona**: Explore the iconic Sagrada Familia, stroll down the vibrant La Rambla, and soak in the Mediterranean ambiance at Barceloneta Beach.
- **Seville**: Visit the awe-inspiring Alcázar, marvel at the Giralda Tower, and wander through the enchanting alleys of the Santa Cruz neighborhood.
- **Granada**: Explore the Generalife Gardens, stroll through the Albayzín quarter with its narrow streets and white houses, and savor the views of the city from the Mirador de San Nicolás.
"""
if country == "Singapore":
image = "images/singapore.jpg"
emoji = "🏙️"
description = """**Description**:
From gleaming skyscrapers to vibrant neighborhoods, this cosmopolitan gem in Southeast Asia promises an immersive journey into a world where tradition meets cutting-edge technology."""
top_places = """
- **Marina Bay Sands**: Enjoy panoramic views from the SkyPark, take a dip in the infinity pool, and explore The Shoppes for luxury shopping and entertainment. At night, witness the mesmerizing light and water show at the Marina Bay Sands Skypark.
- **Gardens by the Bay**: Explore the Flower Dome and Cloud Forest conservatories, and stroll through the scenic OCBC Skyway for breathtaking views of the gardens and city.
- **Sentosa Island**: Escape to Sentosa Island, a resort destination offering a myriad of attractions. Relax on pristine beaches, visit Universal Studios Singapore for thrilling rides, and explore S.E.A. Aquarium for an underwater adventure.
"""
###### STREAMLIT MARKDOWN ######
st.header(f"{country} {emoji}", divider="grey")
st.image(image)
st.markdown(description)
see_top_places = st.checkbox("**Top places to visit**", key={country})
if see_top_places:
st.markdown(top_places)
st.markdown("""## Hotel Recommendation System 🛎️""")
st.info("""This use case shows how you can create personalized hotel recommendations using a recommendation system with **content-based Filtering**.
Analyzing location, amenities, price, and reviews, the model suggests tailored hotel recommendation based on the user's preference.
""")
st.markdown(" ")
path_hotels_data = r"data/hotels"
# Load hotel data
df = load_data_csv(path_hotels_data,"booking_df.csv")
# clean data
df.drop_duplicates(inplace=True)
df["Country"] = df["Country"].apply(lambda x: "Spain" if x=="Espagne" else x)
list_cities = df["City"].value_counts().to_frame().reset_index()
list_cities = list_cities.loc[list_cities["count"]>=5,"City"].to_numpy()
df = df.loc[(df["City"].isin(list_cities)) & (df["Number of bed"]<=6)]
df["Price"] = df["Price"].astype(int)
df.loc[(df["Number of bed"]==0) & (df["Price"]<1000),"Number of bed"] = 1
df.loc[(df["Number of bed"]==0) & (df["Price"].between(1000,2000)),"Number of bed"] = 2
df.loc[(df["Number of bed"]==0) & (df["Price"]>2000),"Number of bed"] = 3
df["Rating"] = df["Rating"].apply(lambda x: np.nan if x==0 else x)
df["Rating"].fillna(np.round(df["Rating"].mean(), 1), inplace=True)
scaler = get_scaler(df)
col1, col2 = st.columns([0.3,0.7], gap="large")
with col1:
# Collect user preferences
st.markdown(" ")
st.markdown(" ")
st.markdown("")
#st.markdown("#### Filter preferences")
list_countries = df["Country"].unique()
location = st.selectbox("Select a Country",list_countries, index=0)
list_nb_beds = df["Number of bed"].unique()
num_beds = st.selectbox("Number of beds", list_nb_beds, index=0)
#if num_beds == "No information"
min_rating, max_rating = st.slider("Range of ratings", min_value=df["Rating"].min(), max_value=df["Rating"].max(), step=0.1, value=(5.0, df["Rating"].max()))
min_price, max_price = st.slider("Range of room prices", min_value=df["Price"].min(), max_value=df["Price"].max(), step=10, value=(df["Price"].min(), 10000))
# Convert price range sliders to integer values
min_price = int(min_price)
max_price = int(max_price)
with col2:
country_info(location)
preferences = {
'Location': location,
'Number of beds': num_beds,
'Rating': [min_rating, max_rating],
'Price': [min_price, max_price],
}
if st.button("Recommend Hotels"):
st.info("Hotels were recommended based on how similar they were to the users preferences.")
# Default number of recommendations to show
max_recommendations = 5
# Call the recommendation function
recommended_hotels, message = recommend_hotels_with_location_and_beds(df, preferences, max_recommendations)
# If no recommendations, reduce the maximum number of recommendations and try again
if recommended_hotels.empty:
max_recommendations -= 1
recommended_hotels, message = recommend_hotels_with_location_and_beds(df, preferences, max_recommendations)
if recommended_hotels.empty:
st.error(message)
# else:
# st.write(recommended_hotels)
else:
st.markdown(" ")
for i in range(len(recommended_hotels)):
#st.dataframe(recommended_hotels)
df_result = recommended_hotels.iloc[i,:]
col1_, col2_ = st.columns([0.4,0.6], gap="medium")
with col1_:
st.image("images/room.jpg",width=100)
st.markdown(f"### {i+1}: {df_result['Hotel Name']}")
st.markdown(f"""**{df_result['Room Type']}** <br>
with {df_result['Bed Type']}
""", unsafe_allow_html=True)
with col2_:
st.markdown(" ")
st.markdown(" ")
annotated_text("**Number of beds :** ",(f"{df_result['Number of bed']}","","#faa"))
#st.markdown(f"**Bed type**: {df_result['Bed Type']}")
annotated_text("**City:** ",(f"{df_result['City']}","","#afa"))
annotated_text("**Rating:** ",(f"{df_result['Rating']}","","#8ef"))
annotated_text("**Price:** ",(f"{df_result['Price']}$","","#fea"))
st.divider()
|