Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,11 +2,15 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import joblib
|
4 |
from preprocessText import preprocess
|
5 |
-
from apiSearch import get_metadata
|
6 |
import base64
|
7 |
import requests
|
|
|
|
|
|
|
8 |
# Load the model
|
9 |
-
|
|
|
10 |
|
11 |
# Define the categories
|
12 |
categories = {
|
@@ -35,7 +39,23 @@ def main():
|
|
35 |
st.set_page_config(layout="wide")
|
36 |
st.markdown(
|
37 |
f"""
|
|
|
38 |
<style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
body{{
|
40 |
display: flex;
|
41 |
justify-content: center;
|
@@ -61,6 +81,11 @@ def main():
|
|
61 |
border: 2px solid #d72324;
|
62 |
padding: 10px;
|
63 |
}}
|
|
|
|
|
|
|
|
|
|
|
64 |
</style>
|
65 |
""",
|
66 |
unsafe_allow_html=True
|
@@ -69,8 +94,10 @@ def main():
|
|
69 |
st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
|
70 |
#https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
|
71 |
# st.write("Enter the video details below:")
|
72 |
-
|
73 |
-
|
|
|
|
|
74 |
# Input fields
|
75 |
with tab1:
|
76 |
with st.container():
|
@@ -82,29 +109,29 @@ def main():
|
|
82 |
if url:
|
83 |
metadata = get_metadata(url)
|
84 |
if not metadata.empty:
|
|
|
85 |
getTitle = metadata['title'].iloc[0]
|
86 |
getDuration = metadata['duration'].iloc[0]
|
87 |
category_id = metadata['category_id'].iloc[0]
|
88 |
getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
|
89 |
getCategory = int(category_id)
|
90 |
-
|
91 |
if getThumbnailUrl is not None:
|
92 |
picture = get_picture_from_url(getThumbnailUrl)
|
93 |
if picture:
|
94 |
-
st.image(picture, caption='
|
95 |
with col2:
|
96 |
title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
|
97 |
-
duration = st.number_input("Duration (in
|
98 |
category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))
|
99 |
|
100 |
with col3:
|
101 |
picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
|
102 |
if picture is not None:
|
103 |
-
st.picture(picture,
|
104 |
# Convert category to category ID
|
105 |
categoryId = categories[category]
|
106 |
|
107 |
-
|
108 |
if st.button("Predict"):
|
109 |
# Perform prediction
|
110 |
if title is None or title.strip() == "" and duration == 0:
|
@@ -127,7 +154,49 @@ def main():
|
|
127 |
st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
|
128 |
|
129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
|
|
|
|
|
|
131 |
def get_picture_from_url(url):
|
132 |
try:
|
133 |
response = requests.get(url)
|
@@ -136,6 +205,82 @@ def get_picture_from_url(url):
|
|
136 |
except:
|
137 |
return None
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
# Function to make predictions
|
140 |
def predict_trend(title, duration, category_id):
|
141 |
duration = str(duration)
|
|
|
2 |
import pandas as pd
|
3 |
import joblib
|
4 |
from preprocessText import preprocess
|
5 |
+
from apiSearch import get_metadata,get_trending_videos
|
6 |
import base64
|
7 |
import requests
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
import numpy as np
|
10 |
+
import seaborn as sns
|
11 |
# Load the model
|
12 |
+
|
13 |
+
model = joblib.load('85pct(new).pkl')
|
14 |
|
15 |
# Define the categories
|
16 |
categories = {
|
|
|
39 |
st.set_page_config(layout="wide")
|
40 |
st.markdown(
|
41 |
f"""
|
42 |
+
|
43 |
<style>
|
44 |
+
@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
|
45 |
+
@import url('https://fonts.googleapis.com/css2?family=YouTube+Sans&display=swap');
|
46 |
+
html, body, [class*="css"] {{
|
47 |
+
font-family: 'Roboto', sans-serif;
|
48 |
+
|
49 |
+
}}
|
50 |
+
[data-testid="stAppViewContainer"] > .main {{
|
51 |
+
background-color : white;
|
52 |
+
|
53 |
+
}}
|
54 |
+
p{{
|
55 |
+
font-family: 'Roboto', sans-serif;
|
56 |
+
text-weight: bold;
|
57 |
+
font-size: 25px;
|
58 |
+
}}
|
59 |
body{{
|
60 |
display: flex;
|
61 |
justify-content: center;
|
|
|
81 |
border: 2px solid #d72324;
|
82 |
padding: 10px;
|
83 |
}}
|
84 |
+
.stButton > button:hover {{
|
85 |
+
background-color: white;
|
86 |
+
color:#d72324;
|
87 |
+
}}
|
88 |
+
|
89 |
</style>
|
90 |
""",
|
91 |
unsafe_allow_html=True
|
|
|
94 |
st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
|
95 |
#https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
|
96 |
# st.write("Enter the video details below:")
|
97 |
+
|
98 |
+
# Define a boolean flag variable to track prediction status
|
99 |
+
prediction_done = False
|
100 |
+
tab1, tab2, tab3 = st.tabs(["Predict", "Trending","Visualize"])
|
101 |
# Input fields
|
102 |
with tab1:
|
103 |
with st.container():
|
|
|
109 |
if url:
|
110 |
metadata = get_metadata(url)
|
111 |
if not metadata.empty:
|
112 |
+
|
113 |
getTitle = metadata['title'].iloc[0]
|
114 |
getDuration = metadata['duration'].iloc[0]
|
115 |
category_id = metadata['category_id'].iloc[0]
|
116 |
getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
|
117 |
getCategory = int(category_id)
|
118 |
+
|
119 |
if getThumbnailUrl is not None:
|
120 |
picture = get_picture_from_url(getThumbnailUrl)
|
121 |
if picture:
|
122 |
+
st.image(picture, caption='Thumbnail captured',width = 400, channels="BGR")
|
123 |
with col2:
|
124 |
title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
|
125 |
+
duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration)
|
126 |
category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))
|
127 |
|
128 |
with col3:
|
129 |
picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
|
130 |
if picture is not None:
|
131 |
+
st.picture(picture,caption='Thumbnail Uploaded',width = 400, channels="BGR")
|
132 |
# Convert category to category ID
|
133 |
categoryId = categories[category]
|
134 |
|
|
|
135 |
if st.button("Predict"):
|
136 |
# Perform prediction
|
137 |
if title is None or title.strip() == "" and duration == 0:
|
|
|
154 |
st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
|
155 |
|
156 |
|
157 |
+
with tab2:
|
158 |
+
country_code = st.selectbox("Select Country Code", ['US', 'CA', 'GB','DE', 'FR', 'RU', 'BR','IN','MY','SG','JP','KR'])
|
159 |
+
with st.container():
|
160 |
+
st.write("Top 10 Trending Video")
|
161 |
+
df = get_trending_videos(country_code)
|
162 |
+
st.dataframe(df)
|
163 |
+
if df is not None:
|
164 |
+
# Display video titles
|
165 |
+
selected_video_title = st.selectbox("Select a Video", df['title'])
|
166 |
+
selected_video = df[df['title'] == selected_video_title].iloc[0]
|
167 |
+
|
168 |
+
col4,col5 = st.columns(2)
|
169 |
+
with col4:
|
170 |
+
if selected_video is not None:
|
171 |
+
image = get_picture_from_url(selected_video['thumbnail_link'])
|
172 |
+
if image:
|
173 |
+
st.image(image, caption='Thumbnail captured',width = 400, channels="BGR")
|
174 |
+
with col5:
|
175 |
+
st.write("Title:", selected_video['title'])
|
176 |
+
category_name = next((key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category')
|
177 |
+
st.write("Category:", category_name)
|
178 |
+
st.write("Duration:", selected_video['duration'])
|
179 |
+
else:
|
180 |
+
st.error('Failed to retrieve trending videos.')
|
181 |
+
|
182 |
+
with tab3:
|
183 |
+
with st.container():
|
184 |
+
col6,col7 = st.columns(2)
|
185 |
+
|
186 |
+
with col6:
|
187 |
+
show_top_category()
|
188 |
+
|
189 |
+
with col7:
|
190 |
+
show_top_duration()
|
191 |
+
|
192 |
+
with st.container():
|
193 |
+
col8,col9 = st.columns(2)
|
194 |
+
with col8:
|
195 |
+
show_top_title()
|
196 |
|
197 |
+
with col9:
|
198 |
+
show_top_titleLength()
|
199 |
+
|
200 |
def get_picture_from_url(url):
|
201 |
try:
|
202 |
response = requests.get(url)
|
|
|
205 |
except:
|
206 |
return None
|
207 |
|
208 |
+
def show_top_category():
|
209 |
+
topCategory = pd.read_csv('topCategory.csv')
|
210 |
+
# Sort the DataFrame in ascending order based on predicted_prob column
|
211 |
+
topCategory_sorted = topCategory.sort_values('predicted_prob')
|
212 |
+
|
213 |
+
# Add a 'rank' column representing the ascending order of predicted_prob
|
214 |
+
topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
|
215 |
+
# Map category_id to category name using the categories dictionary
|
216 |
+
topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
|
217 |
+
|
218 |
+
# Set a color palette for the plot
|
219 |
+
color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
|
220 |
+
|
221 |
+
# Create a bar plot based on rank and predicted_prob columns with different colors for each category_name
|
222 |
+
fig, ax = plt.subplots(figsize=(8, 5))
|
223 |
+
sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
|
224 |
+
plt.xlabel('Rank')
|
225 |
+
plt.ylabel('Predicted Probability')
|
226 |
+
plt.title('Top Categories')
|
227 |
+
|
228 |
+
# Display the legend and the plot in Streamlit
|
229 |
+
st.pyplot(fig)
|
230 |
+
|
231 |
+
def show_top_duration():
|
232 |
+
topDuration = pd.read_csv('topDuration.csv')
|
233 |
+
topDuration_sorted = topDuration.sort_values('predicted_prob', ascending=False)
|
234 |
+
|
235 |
+
# Set the duration as the x-axis and predicted_prob as the y-axis
|
236 |
+
x = topDuration_sorted['duration']
|
237 |
+
y = topDuration_sorted['predicted_prob']
|
238 |
+
|
239 |
+
# Create a scatter plot of duration vs predicted_prob
|
240 |
+
plt.figure(figsize=(8, 5)) # Adjust the figure size here (width, height)
|
241 |
+
plt.scatter(x, y)
|
242 |
+
plt.xlabel('Duration')
|
243 |
+
plt.ylabel('Predicted Probability')
|
244 |
+
plt.title('Top Durations')
|
245 |
+
|
246 |
+
# Display the plot in Streamlit
|
247 |
+
st.pyplot(plt)
|
248 |
+
|
249 |
+
def show_top_title():
|
250 |
+
topTitle = pd.read_csv('topTitle.csv')
|
251 |
+
# Sort the DataFrame in ascending order based on predicted_prob column
|
252 |
+
topTitle_sorted = topTitle.sort_values('Importance Score')
|
253 |
+
|
254 |
+
plt.subplots(figsize=(5, 5))
|
255 |
+
plt.barh(topTitle_sorted['Feature'], topTitle_sorted['Importance Score'])
|
256 |
+
plt.xlabel('Importance Score')
|
257 |
+
plt.ylabel('Feature')
|
258 |
+
plt.title('Top Title Features')
|
259 |
+
st.pyplot(plt)
|
260 |
+
|
261 |
+
|
262 |
+
def round_interval(interval_str):
|
263 |
+
start, end = map(float, interval_str.strip('()[]').split(','))
|
264 |
+
return f"({int(start)}, {int(end)})"
|
265 |
+
|
266 |
+
def show_top_titleLength():
|
267 |
+
topTitleLength = pd.read_csv('topTitleLength.csv')
|
268 |
+
|
269 |
+
title_length_ranges = topTitleLength['titleLength']
|
270 |
+
predicted_probs = topTitleLength['predicted_prob']
|
271 |
+
rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
|
272 |
+
# Set the style of the plot
|
273 |
+
sns.set(style='whitegrid')
|
274 |
+
# Plot the graph using Seaborn
|
275 |
+
plt.figure(figsize=(10, 6))
|
276 |
+
sns.barplot(x=rounded_ranges, y=predicted_probs)
|
277 |
+
plt.xlabel('Title Length Range')
|
278 |
+
plt.ylabel('Predicted Probability')
|
279 |
+
plt.title('Top 5 Ranges for Title Length vs. Predicted Probability')
|
280 |
+
plt.xticks(rotation=45)
|
281 |
+
plt.show()
|
282 |
+
st.pyplot(plt)
|
283 |
+
|
284 |
# Function to make predictions
|
285 |
def predict_trend(title, duration, category_id):
|
286 |
duration = str(duration)
|