Spaces:

xinah3131
/

youtube-trend-prediction

Sleeping

App Files Files Community

xinah3131 commited on Jun 6, 2023

Commit

cdd172b

•

1 Parent(s): e46e65a

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -9

app.py CHANGED Viewed

@@ -2,11 +2,15 @@ import streamlit as st
 import pandas as pd
 import joblib
 from preprocessText import preprocess
-from apiSearch import get_metadata
 import base64
 import requests
 # Load the model
-model = joblib.load('85pct.pkl')
 # Define the categories
 categories = {
@@ -35,7 +39,23 @@ def main():
     st.set_page_config(layout="wide")
     st.markdown(
         f"""
         <style>
             body{{
                 display: flex;
                 justify-content: center;
@@ -61,6 +81,11 @@ def main():
                 border: 2px solid #d72324;
                 padding: 10px;
             }}
         </style>
         """,
         unsafe_allow_html=True
@@ -69,8 +94,10 @@ def main():
     st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
     #https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
     # st.write("Enter the video details below:")
-    tab1, tab2 = st.tabs(["Predict", "Test"])
     # Input fields
     with tab1:
         with st.container():
@@ -82,29 +109,29 @@ def main():
                 if url:
                     metadata = get_metadata(url)
                     if not metadata.empty:
                         getTitle = metadata['title'].iloc[0]
                         getDuration = metadata['duration'].iloc[0]
                         category_id = metadata['category_id'].iloc[0]
                         getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
                         getCategory = int(category_id)
                         if getThumbnailUrl is not None:
                             picture = get_picture_from_url(getThumbnailUrl)
                             if picture:
-                                st.image(picture, caption='Uploaded Picture', use_column_width=True)
             with col2:
                 title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
-                duration = st.number_input("Duration (in minutes)", min_value=0.0, value=getDuration)
                 category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))
             with col3:
                 picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
                 if picture is not None:
-                    st.picture(picture, use_column_width=True)
     # Convert category to category ID
         categoryId = categories[category]
         if st.button("Predict"):
             # Perform prediction
             if title is None or title.strip() == "" and duration == 0:
@@ -127,7 +154,49 @@ def main():
                         st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
 def get_picture_from_url(url):
     try:
         response = requests.get(url)
@@ -136,6 +205,82 @@ def get_picture_from_url(url):
     except:
         return None
 # Function to make predictions
 def predict_trend(title, duration, category_id):
     duration = str(duration)

 import pandas as pd
 import joblib
 from preprocessText import preprocess
+from apiSearch import get_metadata,get_trending_videos
 import base64
 import requests
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
 # Load the model
+model = joblib.load('85pct(new).pkl')
 # Define the categories
 categories = {
     st.set_page_config(layout="wide")
     st.markdown(
         f"""
         <style>
+            @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@400;700&display=swap');
+            @import url('https://fonts.googleapis.com/css2?family=YouTube+Sans&display=swap');
+            html, body, [class*="css"]  {{
+			    font-family: 'Roboto', sans-serif;
+			}}
+            [data-testid="stAppViewContainer"] > .main {{
+                background-color : white;
+            }}
+            p{{
+                font-family: 'Roboto', sans-serif;
+                text-weight: bold;
+                font-size: 25px;
+            }}
             body{{
                 display: flex;
                 justify-content: center;
                 border: 2px solid #d72324;
                 padding: 10px;
             }}
+            .stButton > button:hover {{
+                background-color: white;
+                color:#d72324;
+            }}
         </style>
         """,
         unsafe_allow_html=True
     st.markdown("<h1>YouTube Trend Prediction</h1>", unsafe_allow_html=True)
     #https://www.freepnglogos.com/uploads/youtube-play-red-logo-png-transparent-background-6.png
     # st.write("Enter the video details below:")
+    # Define a boolean flag variable to track prediction status
+    prediction_done = False
+    tab1, tab2, tab3 = st.tabs(["Predict", "Trending","Visualize"])
     # Input fields
     with tab1:
         with st.container():
                 if url:
                     metadata = get_metadata(url)
                     if not metadata.empty:
                         getTitle = metadata['title'].iloc[0]
                         getDuration = metadata['duration'].iloc[0]
                         category_id = metadata['category_id'].iloc[0]
                         getThumbnailUrl = metadata['thumbnail_link'].iloc[0]
                         getCategory = int(category_id)
                         if getThumbnailUrl is not None:
                             picture = get_picture_from_url(getThumbnailUrl)
                             if picture:
+                                st.image(picture, caption='Thumbnail captured',width = 400, channels="BGR")
             with col2:
                 title = st.text_input("Title", placeholder="Enter a video title",value=getTitle)
+                duration = st.number_input("Duration (in seconds)", min_value=0.0, value=getDuration)
                 category = st.selectbox("Category", list(categories.keys()), index=list(categories.values()).index(getCategory))
             with col3:
                 picture = st.file_uploader("Upload Picture", type=["jpg", "jpeg", "png"])
                 if picture is not None:
+                    st.picture(picture,caption='Thumbnail Uploaded',width = 400, channels="BGR")
     # Convert category to category ID
         categoryId = categories[category]
         if st.button("Predict"):
             # Perform prediction
             if title is None or title.strip() == "" and duration == 0:
                         st.markdown("![Alt Text](https://media.tenor.com/VYKtkKnHaUcAAAAj/quby-cute.gif)")
+    with tab2:
+        country_code = st.selectbox("Select Country Code", ['US', 'CA', 'GB','DE', 'FR', 'RU', 'BR','IN','MY','SG','JP','KR'])
+        with st.container():
+            st.write("Top 10 Trending Video")
+            df = get_trending_videos(country_code)
+            st.dataframe(df)
+            if df is not None:
+                # Display video titles
+                selected_video_title = st.selectbox("Select a Video", df['title'])
+                selected_video = df[df['title'] == selected_video_title].iloc[0]
+                col4,col5 = st.columns(2)
+                with col4:
+                    if selected_video is not None:
+                        image = get_picture_from_url(selected_video['thumbnail_link'])
+                        if image:
+                            st.image(image, caption='Thumbnail captured',width = 400, channels="BGR")
+                with col5:
+                    st.write("Title:", selected_video['title'])
+                    category_name = next((key for key, value in categories.items() if value == selected_video['category_id']), 'Unknown Category')
+                    st.write("Category:", category_name)
+                    st.write("Duration:", selected_video['duration'])
+            else:
+                st.error('Failed to retrieve trending videos.')
+        with tab3:
+            with st.container():
+                col6,col7 = st.columns(2)
+                with col6:
+                    show_top_category()
+                with col7:
+                    show_top_duration()
+            with st.container():
+                col8,col9 = st.columns(2)
+                with col8:
+                    show_top_title()
+                with col9:
+                    show_top_titleLength()
 def get_picture_from_url(url):
     try:
         response = requests.get(url)
     except:
         return None
+def show_top_category():
+    topCategory = pd.read_csv('topCategory.csv')
+    # Sort the DataFrame in ascending order based on predicted_prob column
+    topCategory_sorted = topCategory.sort_values('predicted_prob')
+    # Add a 'rank' column representing the ascending order of predicted_prob
+    topCategory_sorted['rank'] = range(1, len(topCategory_sorted) + 1)
+    # Map category_id to category name using the categories dictionary
+    topCategory_sorted['category_name'] = topCategory_sorted['category_id'].map(lambda x: next((key for key, value in categories.items() if value == x), 'Unknown Category'))
+    # Set a color palette for the plot
+    color_palette = sns.color_palette('Set2', len(topCategory_sorted['category_id'].unique()))
+    # Create a bar plot based on rank and predicted_prob columns with different colors for each category_name
+    fig, ax = plt.subplots(figsize=(8, 5))
+    sns.barplot(data=topCategory_sorted, x='rank', y='predicted_prob', hue='category_name', palette=color_palette)
+    plt.xlabel('Rank')
+    plt.ylabel('Predicted Probability')
+    plt.title('Top Categories')
+    # Display the legend and the plot in Streamlit
+    st.pyplot(fig)
+def show_top_duration():
+    topDuration = pd.read_csv('topDuration.csv')
+    topDuration_sorted = topDuration.sort_values('predicted_prob', ascending=False)
+    # Set the duration as the x-axis and predicted_prob as the y-axis
+    x = topDuration_sorted['duration']
+    y = topDuration_sorted['predicted_prob']
+    # Create a scatter plot of duration vs predicted_prob
+    plt.figure(figsize=(8, 5))  # Adjust the figure size here (width, height)
+    plt.scatter(x, y)
+    plt.xlabel('Duration')
+    plt.ylabel('Predicted Probability')
+    plt.title('Top Durations')
+    # Display the plot in Streamlit
+    st.pyplot(plt)
+def show_top_title():
+    topTitle = pd.read_csv('topTitle.csv')
+    # Sort the DataFrame in ascending order based on predicted_prob column
+    topTitle_sorted = topTitle.sort_values('Importance Score')
+    plt.subplots(figsize=(5, 5))
+    plt.barh(topTitle_sorted['Feature'], topTitle_sorted['Importance Score'])
+    plt.xlabel('Importance Score')
+    plt.ylabel('Feature')
+    plt.title('Top Title Features')
+    st.pyplot(plt)
+def round_interval(interval_str):
+    start, end = map(float, interval_str.strip('()[]').split(','))
+    return f"({int(start)}, {int(end)})"
+def show_top_titleLength():
+    topTitleLength = pd.read_csv('topTitleLength.csv')
+    title_length_ranges = topTitleLength['titleLength']
+    predicted_probs = topTitleLength['predicted_prob']
+    rounded_ranges = [round_interval(range_val) for range_val in title_length_ranges]
+    # Set the style of the plot
+    sns.set(style='whitegrid')
+    # Plot the graph using Seaborn
+    plt.figure(figsize=(10, 6))
+    sns.barplot(x=rounded_ranges, y=predicted_probs)
+    plt.xlabel('Title Length Range')
+    plt.ylabel('Predicted Probability')
+    plt.title('Top 5 Ranges for Title Length vs. Predicted Probability')
+    plt.xticks(rotation=45)
+    plt.show()
+    st.pyplot(plt)
 # Function to make predictions
 def predict_trend(title, duration, category_id):
     duration = str(duration)