xinah3131 commited on
Commit
cdd172b
1 Parent(s): e46e65a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +154 -9
app.py CHANGED
@@ -2,11 +2,15 @@ import streamlit as st
2
  import pandas as pd
3
  import joblib
4
  from preprocessText import preprocess
5
- from apiSearch import get_metadata
6
  import base64
7
  import requests
 
 
 
8
  # Load the model
9
- model = joblib.load('85pct.pkl')
 
10
 
11
  # Define the categories
12
  categories = {
@@ -35,7 +39,23 @@ def main():
35
  st.set_page_config(layout="wide")
36
  st.markdown(
37
  f"""
 
38
  <style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  body{{
40
  display: flex;
41
  justify-content: center;
@@ -61,6 +81,11 @@ def main():
61
  border: 2px solid #d72324;
62
  padding: 10px;
63
  }}
 
 
 
 
 
64
  </style>
65
  """,
66
  unsafe_allow_html=True
@@ -69,8 +94,10 @@ def main():
69
  st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
70
  #https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
71
  # st.write("Enter the video details below:")
72
-
73
- tab1, tab2 = st.tabs(["Predict", "Test"])
 
 
74
  # Input fields
75
  with tab1:
76
  with st.container():
@@ -82,29 +109,29 @@ def main():
82
  if url:
83
  metadata = get_metadata(url)
84
  if not metadata.empty:
 
85
  getTitle = metadata['title'].iloc[0]
86
  getDuration = metadata['duration'].iloc[0]
87
  category_id = metadata['category_id'].iloc[0]
88
  getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
89
  getCategory = int(category_id)
90
-
91
  if getThumbnailUrl is not None:
92
  picture = get_picture_from_url(getThumbnailUrl)
93
  if picture:
94
- st.image(picture, caption='Uploaded Picture', use_column_width=True)
95
  with col2:
96
  title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
97
- duration = st.number_input("Duration (in minutes)", min_value=0.0, value=getDuration)
98
  category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))
99
 
100
  with col3:
101
  picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
102
  if picture is not None:
103
- st.picture(picture, use_column_width=True)
104
  # Convert category to category ID
105
  categoryId = categories[category]
106
 
107
-
108
  if st.button("Predict"):
109
  # Perform prediction
110
  if title is None or title.strip() == "" and duration == 0:
@@ -127,7 +154,49 @@ def main():
127
  st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
128
 
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
 
 
 
131
  def get_picture_from_url(url):
132
  try:
133
  response = requests.get(url)
@@ -136,6 +205,82 @@ def get_picture_from_url(url):
136
  except:
137
  return None
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # Function to make predictions
140
  def predict_trend(title, duration, category_id):
141
  duration = str(duration)
 
2
  import pandas as pd
3
  import joblib
4
  from preprocessText import preprocess
5
+ from apiSearch import get_metadata,get_trending_videos
6
  import base64
7
  import requests
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
+ import seaborn as sns
11
  # Load the model
12
+
13
+ model = joblib.load('85pct(new).pkl')
14
 
15
  # Define the categories
16
  categories = {
 
39
  st.set_page_config(layout="wide")
40
  st.markdown(
41
  f"""
42
+
43
  <style>
44
+ @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
45
+ @import url('https://fonts.googleapis.com/css2?family=YouTube+Sans&display=swap');
46
+ html, body, [class*="css"] {{
47
+ font-family: 'Roboto', sans-serif;
48
+
49
+ }}
50
+ [data-testid="stAppViewContainer"] > .main {{
51
+ background-color : white;
52
+
53
+ }}
54
+ p{{
55
+ font-family: 'Roboto', sans-serif;
56
+ text-weight: bold;
57
+ font-size: 25px;
58
+ }}
59
  body{{
60
  display: flex;
61
  justify-content: center;
 
81
  border: 2px solid #d72324;
82
  padding: 10px;
83
  }}
84
+ .stButton > button:hover {{
85
+ background-color: white;
86
+ color:#d72324;
87
+ }}
88
+
89
  </style>
90
  """,
91
  unsafe_allow_html=True
 
94
  st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
95
  #https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
96
  # st.write("Enter the video details below:")
97
+
98
+ # Define a boolean flag variable to track prediction status
99
+ prediction_done = False
100
+ tab1, tab2, tab3 = st.tabs(["Predict", "Trending","Visualize"])
101
  # Input fields
102
  with tab1:
103
  with st.container():
 
109
  if url:
110
  metadata = get_metadata(url)
111
  if not metadata.empty:
112
+
113
  getTitle = metadata['title'].iloc[0]
114
  getDuration = metadata['duration'].iloc[0]
115
  category_id = metadata['category_id'].iloc[0]
116
  getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
117
  getCategory = int(category_id)
118
+
119
  if getThumbnailUrl is not None:
120
  picture = get_picture_from_url(getThumbnailUrl)
121
  if picture:
122
+ st.image(picture, caption='Thumbnail captured',width = 400, channels="BGR")
123
  with col2:
124
  title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
125
+ duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration)
126
  category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))
127
 
128
  with col3:
129
  picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
130
  if picture is not None:
131
+ st.picture(picture,caption='Thumbnail Uploaded',width = 400, channels="BGR")
132
  # Convert category to category ID
133
  categoryId = categories[category]
134
 
 
135
  if st.button("Predict"):
136
  # Perform prediction
137
  if title is None or title.strip() == "" and duration == 0:
 
154
  st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
155
 
156
 
157
+ with tab2:
158
+ country_code = st.selectbox("Select Country Code", ['US', 'CA', 'GB','DE', 'FR', 'RU', 'BR','IN','MY','SG','JP','KR'])
159
+ with st.container():
160
+ st.write("Top 10 Trending Video")
161
+ df = get_trending_videos(country_code)
162
+ st.dataframe(df)
163
+ if df is not None:
164
+ # Display video titles
165
+ selected_video_title = st.selectbox("Select a Video", df['title'])
166
+ selected_video = df[df['title'] == selected_video_title].iloc[0]
167
+
168
+ col4,col5 = st.columns(2)
169
+ with col4:
170
+ if selected_video is not None:
171
+ image = get_picture_from_url(selected_video['thumbnail_link'])
172
+ if image:
173
+ st.image(image, caption='Thumbnail captured',width = 400, channels="BGR")
174
+ with col5:
175
+ st.write("Title:", selected_video['title'])
176
+ category_name = next((key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category')
177
+ st.write("Category:", category_name)
178
+ st.write("Duration:", selected_video['duration'])
179
+ else:
180
+ st.error('Failed to retrieve trending videos.')
181
+
182
+ with tab3:
183
+ with st.container():
184
+ col6,col7 = st.columns(2)
185
+
186
+ with col6:
187
+ show_top_category()
188
+
189
+ with col7:
190
+ show_top_duration()
191
+
192
+ with st.container():
193
+ col8,col9 = st.columns(2)
194
+ with col8:
195
+ show_top_title()
196
 
197
+ with col9:
198
+ show_top_titleLength()
199
+
200
  def get_picture_from_url(url):
201
  try:
202
  response = requests.get(url)
 
205
  except:
206
  return None
207
 
208
+ def show_top_category():
209
+ topCategory = pd.read_csv('topCategory.csv')
210
+ # Sort the DataFrame in ascending order based on predicted_prob column
211
+ topCategory_sorted = topCategory.sort_values('predicted_prob')
212
+
213
+ # Add a 'rank' column representing the ascending order of predicted_prob
214
+ topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
215
+ # Map category_id to category name using the categories dictionary
216
+ topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
217
+
218
+ # Set a color palette for the plot
219
+ color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
220
+
221
+ # Create a bar plot based on rank and predicted_prob columns with different colors for each category_name
222
+ fig, ax = plt.subplots(figsize=(8, 5))
223
+ sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
224
+ plt.xlabel('Rank')
225
+ plt.ylabel('Predicted Probability')
226
+ plt.title('Top Categories')
227
+
228
+ # Display the legend and the plot in Streamlit
229
+ st.pyplot(fig)
230
+
231
+ def show_top_duration():
232
+ topDuration = pd.read_csv('topDuration.csv')
233
+ topDuration_sorted = topDuration.sort_values('predicted_prob', ascending=False)
234
+
235
+ # Set the duration as the x-axis and predicted_prob as the y-axis
236
+ x = topDuration_sorted['duration']
237
+ y = topDuration_sorted['predicted_prob']
238
+
239
+ # Create a scatter plot of duration vs predicted_prob
240
+ plt.figure(figsize=(8, 5)) # Adjust the figure size here (width, height)
241
+ plt.scatter(x, y)
242
+ plt.xlabel('Duration')
243
+ plt.ylabel('Predicted Probability')
244
+ plt.title('Top Durations')
245
+
246
+ # Display the plot in Streamlit
247
+ st.pyplot(plt)
248
+
249
+ def show_top_title():
250
+ topTitle = pd.read_csv('topTitle.csv')
251
+ # Sort the DataFrame in ascending order based on predicted_prob column
252
+ topTitle_sorted = topTitle.sort_values('Importance Score')
253
+
254
+ plt.subplots(figsize=(5, 5))
255
+ plt.barh(topTitle_sorted['Feature'], topTitle_sorted['Importance Score'])
256
+ plt.xlabel('Importance Score')
257
+ plt.ylabel('Feature')
258
+ plt.title('Top Title Features')
259
+ st.pyplot(plt)
260
+
261
+
262
+ def round_interval(interval_str):
263
+ start, end = map(float, interval_str.strip('()[]').split(','))
264
+ return f"({int(start)}, {int(end)})"
265
+
266
+ def show_top_titleLength():
267
+ topTitleLength = pd.read_csv('topTitleLength.csv')
268
+
269
+ title_length_ranges = topTitleLength['titleLength']
270
+ predicted_probs = topTitleLength['predicted_prob']
271
+ rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
272
+ # Set the style of the plot
273
+ sns.set(style='whitegrid')
274
+ # Plot the graph using Seaborn
275
+ plt.figure(figsize=(10, 6))
276
+ sns.barplot(x=rounded_ranges, y=predicted_probs)
277
+ plt.xlabel('Title Length Range')
278
+ plt.ylabel('Predicted Probability')
279
+ plt.title('Top 5 Ranges for Title Length vs. Predicted Probability')
280
+ plt.xticks(rotation=45)
281
+ plt.show()
282
+ st.pyplot(plt)
283
+
284
  # Function to make predictions
285
  def predict_trend(title, duration, category_id):
286
  duration = str(duration)