Spaces:

mai-do
/

reddit-sentiment

Runtime error

App Files Files Community

Steven Lu @ MBP-M1-Max commited on Aug 19, 2024

Commit

7a410cd

1 Parent(s): b17a2c7

- old bugs fixed

Browse files

Files changed (2) hide show

app.py +548 -0
requirements.txt +82 -0

app.py ADDED Viewed

	@@ -0,0 +1,548 @@

+#!/usr/bin/env python3
+# from googleapiclient.discovery import build
+# from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+# import pandas as pd
+# import streamlit as st
+# import re
+# # Set up the YouTube API client
+# api_key = 'AIzaSyAtaMM03J79pb2vhBOvsIYMlQ84sx9Fb2U'  # Replace with your API key
+# youtube = build('youtube', 'v3', developerKey=api_key)
+# # Set up the Reddit API client (PRAW)
+# reddit = praw.Reddit(
+#     client_id='EhlUF9EavT4rAx42jQshKQ',  # Replace with your Reddit client_id
+#     client_secret='Zwc8iLJN8saS3B6booPKjabXw63cZQ',  # Replace with your Reddit client_secret
+#     user_agent='FondantOk6255'  # Replace with your user_agent
+#)
+import streamlit as st
+st.set_page_config(page_title="Reddit Comment Analyzer", layout="wide")
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+from wordcloud import WordCloud
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import re
+import numpy as np
+from nltk.corpus import stopwords
+from collections import Counter
+import praw
+import nltk
+from datetime import datetime
+from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
+from transformers import pipeline
+# Load sentiment analysis pipeline
+sentiment_analyzer = pipeline('sentiment-analysis')
+# Load summarization pipeline
+summarizer = pipeline('summarization')
+def analyze_sentiment(comment):
+    if len(comment) <= 500:  # Skip long comments
+        return sentiment_analyzer(comment)[0]['label']
+    else:
+        return 'neutral'
+def summarize_text(text):
+    if len(text) <= 500:  # Skip long text for summarization
+        return summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
+    else:
+        return text[:500] + '...'  # Return truncated version
+# Access secrets using st.secrets
+reddit_client_id = st.secrets.default["client_id"]
+reddit_client_secret = st.secrets.default["client_secret"]
+reddit_user_agent = st.secrets.default["user_agent"]
+# Initialize Reddit API client using the API keys
+reddit = praw.Reddit(
+    client_id=reddit_client_id,
+    client_secret=reddit_client_secret,
+    user_agent=reddit_user_agent
+)
+# VADER sentiment analyzer setup
+analyzer = SentimentIntensityAnalyzer()
+# Download stopwords
+nltk.download('stopwords')
+stop_words_set = ['at', 'how', 'do', 'm', 'during', 'again', 'been', 'dont', 'itself', 'from', 'in',
+              'myself', "wouldn't", 'which', 'than', 'yourselves', 'her', 's', 'further', 'won', 'my',
+              'more', 'would', 'no', 'some', 'yours', "weren't", "haven't", 'over', 'couldn', 'against',
+              "mustn't", 'same', 'was', 'himself', "aren't", 'through', 'shan', 'he', "mightn't", 'only',
+              'on', 't', 'ourselves', 'these', 'other', 'up', 'about', 'hers', 'hasn', 'it', "doesn't",
+              'for', 'wouldn', 'doing', 'not', 'his', 'll', 'you', "couldn't", 'too', 'haven', 'those',
+              'our', 'because', 'im', 'know', 'until', 'to', 'mightn', 'such', 'very', 'needn', 'they',
+              'or', 'as', 'having', 'isn', 'here', 'didn', "isn't", "i'm", 'most', 'did', 'have',
+              "it's", "hadn't", 'by', 'has', 'into', 'there', 'yourself', 'had', 'am', 'y', 'just',
+              'don', 'are', 'does', 'like', 'whom', 'should', 'after', 'mustn', 'once', 'below',
+              'him', 'who', "you're", 'them', 'why', 'your', "you've", "you'll", 'is', "don't",
+              'aren', 'when', 'so', 'can', 'being', 'and', "should've", 'that', 'above',
+              "didn't", 'hadn', 'doesn', 've', 'ma', 'before', 'out', 'the', 'if', 'where',
+              "shan't", 'under', 'each', 'ain', 'what', "shouldn't", 'down', 'now', 'weren',
+              'youre', 'a', 'with', "hasn't", 'herself', 'get', 're', "she's", 'of', 'we',
+              "wasn't", 'their', 'theirs', 'but', 'o', "that'll", 'its', 'own', 'wasn',
+              'all', 'nor', "you'd", 'shouldn', 'both', 'me', 'd', 'between', 'be', 'an',
+              'any', 'i', 'she', 'this', 'then', "won't", 'were', 'will', "needn't", 'off',
+              'few', 'themselves', 'ours', 'while']
+# Combine custom stopwords with NLTK stopwords
+stop_words = list(set(stopwords.words('english')).union(stop_words_set))
+# Set up the TfidfVectorizer using the combined stop words
+# vectorizer = TfidfVectorizer(stop_words=list(stop_words))
+# Convert the set to a list before passing to TfidfVectorizer
+# stop_words_list = list(stop_words)
+# Verify that stop_words_list is a list of strings
+# st.write(stop_words_list[:10])  # Print first 10 stop words for verification
+# Use the vectorizer and pass the stop words list
+try:
+    vectorizer = TfidfVectorizer(stop_words=stop_words, max_features=5)
+    print("Stop words applied:", vectorizer.get_stop_words())
+    st.write("TfidfVectorizer initialized successfully!")
+except Exception as e:
+    st.error(f"Error initializing TfidfVectorizer: {e}")
+# Streamlit app structure
+st.title("Reddit Keyword-Based Comment Analyzer")
+@st.cache
+def fetch_reddit_data(query, max_results=50, min_score=10):
+    posts = reddit.subreddit('all').search(query, limit=max_results)
+    comments, timestamps, scores = [], [], []
+    for post in posts:
+        post.comments.replace_more(limit=0)
+        for comment in post.comments.list():
+            if isinstance(comment, praw.models.Comment) and comment.body and comment.created_utc:
+                if comment.score >= min_score:  # Filter comments by minimum score (upvotes)
+                    comments.append(comment.body)
+                    timestamps.append(pd.to_datetime(comment.created_utc, unit='s'))
+                    scores.append(comment.score)  # Store the comment score for reference
+    return comments, timestamps, scores
+def preprocess_text(text):
+    # Normalize contractions and remove non-alphabetic characters
+    text = re.sub(r"[^\w\s]", '', text.lower())  # Removes punctuation and converts to lower case
+    text = re.sub(r'\s+', ' ', text)  # Removes excess whitespace
+    return text
+def analyze_sentiment(comments):
+    return [analyzer.polarity_scores(comment)['compound'] for comment in comments]
+def generate_wordcloud(comments):
+    filtered_words = ' '.join([word for word in ' '.join(comments).split() if word not in stop_words])
+    return WordCloud(width=400, height=400, background_color='white').generate(filtered_words)
+# Extract features for keywords
+def extract_features(comments):
+    vectorizer = TfidfVectorizer(stop_words=stop_words, max_features=50)
+    X = vectorizer.fit_transform(comments)
+    return vectorizer.get_feature_names_out(), X.sum(axis=0).A1
+query = st.text_input("Enter a keyword to search for Reddit comments", value="data analyst bootcamp online course")
+start_date = st.date_input("Start Date", value=pd.to_datetime("2024-01-01").date())
+end_date = st.date_input("End Date", value=pd.to_datetime("today").date())
+if st.button("Analyze"):
+    comments, timestamps, score = fetch_reddit_data(query, max_results=50, min_score=10)
+    print(f'Fetched {len(comments)} comments.')
+    if not comments:
+        st.warning("No comments found for this search query.")
+    else:
+        # Clean the comments before passing them to TfidfVectorizer
+        cleaned_comments = [preprocess_text(comment) for comment in comments]
+        print("Sample of cleaned comments:")
+        print(cleaned_comments[:5])
+        sentiment_scores = analyze_sentiment(cleaned_comments)
+        df = pd.DataFrame({
+            'comment': cleaned_comments,
+            'sentiment': sentiment_scores,
+            'created_at': timestamps
+        })
+        # Ensure created_at is in datetime format
+        df['created_at'] = pd.to_datetime(df['created_at'])
+        # Set the datetime index for resampling
+        df.set_index('created_at', inplace=True)
+        # Filter by date range
+        df = df[(df.index >= pd.Timestamp(start_date)) & (df.index <= pd.Timestamp(end_date))]
+        df['sentiment_category'] = df['sentiment'].apply(lambda x: 'positive' if x > 0 else 'negative' if x < 0 else 'neutral')
+         # Save results in session state
+        st.session_state.df = df
+        st.session_state.cleaned_comments = cleaned_comments
+        # If results are in session state, retrieve them
+        if 'df' in st.session_state:
+            df = st.session_state.df
+            cleaned_comments = st.session_state.cleaned_comments
+        st.subheader("Key Metrics")
+        col1, col2, col3, col4 = st.columns(4)
+        col1.metric("Total Comments", len(df))
+        col2.metric("Positive", len(df[df['sentiment_category'] == 'positive']))
+        col3.metric("Neutral", len(df[df['sentiment_category'] == 'neutral']))
+        col4.metric("Negative", len(df[df['sentiment_category'] == 'negative']))
+        # Sentiment Distribution
+        st.subheader("Sentiment Distribution")
+        sentiment_counts = df['sentiment_category'].value_counts()
+        fig1, ax1 = plt.subplots()
+        ax1.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90)
+        ax1.axis('equal')
+        # Top Comments Distribution
+        # Keywords Affecting Sentiment
+        st.subheader("Top Keywords Affecting Sentiment")
+        vectorizer = TfidfVectorizer(stop_words=stop_words, max_features=50)
+        X = vectorizer.fit_transform(df['comment'])
+        features = vectorizer.get_feature_names_out()
+        scores = np.asarray(X.mean(axis=0)).flatten()
+        keywords_df = pd.DataFrame({'Keyword': features, 'Score': scores})
+        top_keywords = keywords_df.sort_values(by='Score', ascending=False).head(10)
+        fig2, ax2 = plt.subplots(figsize=(10, 5))
+        top_keywords.plot(kind='bar', x='Keyword', y='Score', ax=ax2, color='darkorange')
+        ax2.set_xlabel("Keyword")
+        ax2.set_ylabel("TF-IDF Score")
+        ax2.set_title("Top Keywords Affecting Sentiment")
+        plt.xticks(rotation=45, ha='right')
+        # Display the charts side by side
+        col1, col2 = st.columns(2)
+        with col1:
+            st.pyplot(fig1)
+        with col2:
+            st.pyplot(fig2)
+        # Visualizations
+        # Word cloud and common words bar chart side by side
+        st.subheader("Word Cloud and Feature Importance Analysis")
+        col1, col2 = st.columns(2)
+        with col1:
+            filtered_words = ' '.join([word for word in ' '.join(cleaned_comments).split() if word.lower() not in stop_words])
+            wordcloud = WordCloud(width=400, height=400, background_color='white').generate(filtered_words)
+            plt.figure(figsize=(5, 4))
+            plt.imshow(wordcloud, interpolation='bilinear')
+            plt.axis('off')
+            st.pyplot(plt)
+        with col2:
+            # Feature Importance Analysis
+            feature_names, feature_counts = extract_features(cleaned_comments)
+            feature_df = pd.DataFrame({'Feature': feature_names, 'Count': feature_counts})
+            feature_df = feature_df.sort_values(by='Count', ascending=False).head(10)
+            fig3, ax3 = plt.subplots(figsize=(10, 5))
+            feature_plot = feature_df.plot(kind='bar', x='Feature', y='Count', ax=ax3, color='salmon')
+            ax3.set_xlabel("Feature")
+            ax3.set_ylabel("Frequency")
+            ax3.set_title("Top Keywords Impacting Sentiment")
+            plt.xticks(rotation=45, ha='right')
+            st.pyplot(fig3)
+        st.subheader("Sentiment Over Time")
+        sentiment_over_time = df['sentiment'].resample('W').mean()  # Resample by week
+        fig2, ax2 = plt.subplots(figsize=(10, 5))
+        ax2.plot(sentiment_over_time.index, sentiment_over_time.values, marker='o')
+        ax2.set_xlabel("Date")
+        ax2.set_ylabel("Average Sentiment")
+        st.pyplot(fig2)
+        # Sentiment Distribution by Hour of Day
+        st.subheader("Sentiment Distribution by Hour of Day")
+        df['hour'] = df.index.hour
+        sentiment_by_hour = df.groupby('hour')['sentiment'].mean()
+        fig5, ax5 = plt.subplots(figsize=(10, 5))
+        ax5.bar(sentiment_by_hour.index, sentiment_by_hour.values, color='skyblue')
+        ax5.set_xlabel("Hour of Day")
+        ax5.set_ylabel("Average Sentiment")
+        ax5.set_title("Average Sentiment by Hour of Day")
+        st.pyplot(fig5)
+        # Sentiment Heatmap
+        st.subheader("Sentiment Heatmap by Hour and Day")
+        df['day_of_week'] = df.index.day_name()
+        df['hour'] = df.index.hour
+        heatmap_data = df.groupby(['day_of_week', 'hour'])['sentiment'].mean().unstack()
+        fig4, ax4 = plt.subplots(figsize=(12, 6))
+        sns.heatmap(heatmap_data, cmap='coolwarm', annot=True, fmt='.2f', ax=ax4)
+        ax4.set_xlabel("Hour of Day")
+        ax4.set_ylabel("Day of Week")
+        ax4.set_title("Sentiment Heatmap by Hour and Day")
+        st.pyplot(fig4)
+        # Filter comments by sentiment
+        st.subheader("Filter Comments by Sentiment")
+        sentiment_option = st.selectbox("Choose Sentiment", ['positive', 'neutral', 'negative'])
+        filtered_comments = df[df['sentiment_category'] == sentiment_option]
+        if not filtered_comments.empty:
+            st.write(filtered_comments[['comment', 'sentiment_category']].head())
+        else:
+            st.write("No comments found for the selected sentiment.")
+        # Display raw data
+        st.subheader("Raw Data")
+        st.write(df.head())
+        # Enhanced Top Sentiment-Related Keywords
+        st.subheader("Top Sentiment-Related Keywords")
+        keywords = ['excellent', 'good', 'great', 'bad', 'terrible']
+        filtered_comments_with_keywords = [comment for comment in cleaned_comments if any(keyword in comment for keyword in keywords)]
+        if filtered_comments_with_keywords:
+            st.write(f"Found {len(filtered_comments_with_keywords)} comments containing sentiment-related keywords.")
+            for i, comment in enumerate(filtered_comments_with_keywords[:10]):
+                st.write(f"**Comment {i+1}:** {comment}")
+        else:
+            st.write("No comments with sentiment-related keywords found.")
+# import streamlit as st
+# from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+# from wordcloud import WordCloud
+# import pandas as pd
+# import matplotlib.pyplot as plt
+# import seaborn as sns
+# import re
+# import numpy as np
+# from nltk.corpus import stopwords
+# from collections import Counter
+# import praw
+# import nltk
+# from datetime import datetime
+# from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
+# from transformers import pipeline
+# # Load sentiment analysis and summarization pipelines
+# sentiment_analyzer = pipeline('sentiment-analysis')
+# summarizer = pipeline('summarization')
+# def analyze_sentiment(comment):
+#     if len(comment) <= 500:  # Skip long comments
+#         return sentiment_analyzer(comment)[0]['label']
+#     else:
+#         return 'neutral'
+# def summarize_text(text):
+#     if len(text) <= 500:  # Skip long text for summarization
+#         return summarizer(text, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
+#     else:
+#         return text[:500] + '...'  # Return truncated version
+# # Initialize Reddit API client
+# reddit = praw.Reddit(
+#     client_id='EhlUF9EavT4rAx42jQshKQ',
+#     client_secret='Zwc8iLJN8saS3B6booPKjabXw63cZQ',
+#     user_agent='FondantOk6255'
+# )
+# # VADER sentiment analyzer setup
+# analyzer = SentimentIntensityAnalyzer()
+# # Download stopwords
+# nltk.download('stopwords')
+# stop_words_set = set(stopwords.words('english'))
+# stop_words_list = list(stop_words_set)
+# vectorizer = TfidfVectorizer(stop_words=stop_words_list)
+# # Streamlit app structure
+# st.set_page_config(page_title="Reddit Comment Analyzer", layout="wide")
+# st.title("Reddit Keyword-Based Comment Analyzer")
+# # @st.cache
+# # def fetch_reddit_data(query, max_results=50):
+# #     posts = reddit.subreddit('all').search(query, limit=max_results)
+# #     comments, timestamps = [], []
+# #     for post in posts:
+# #         post.comments.replace_more(limit=0)
+# #         for comment in post.comments.list():
+# #             if isinstance(comment, praw.models.Comment) and comment.body and comment.created_utc:
+# #                 comments.append(comment.body)
+# #                 timestamps.append(pd.to_datetime(comment.created_utc, unit='s'))
+# #     return comments, timestamps
+# @st.cache
+# def fetch_reddit_data(query, max_posts=50, max_comments_per_post=10):
+#     posts = reddit.subreddit('all').search(query, limit=max_posts)
+#     comments, timestamps = [], []
+#     for post in posts:
+#         post.comments.replace_more(limit=0)  # Replace MoreComments with actual comments
+#         comment_count = 0
+#         for comment in post.comments.list():
+#             if comment_count >= max_comments_per_post:  # Stop after max_comments_per_post comments
+#                 break
+#             if isinstance(comment, praw.models.Comment) and comment.body and comment.created_utc:
+#                 comments.append(comment.body)
+#                 timestamps.append(pd.to_datetime(comment.created_utc, unit='s'))
+#                 comment_count += 1
+#         # Optional: Stop after reaching max_comments total
+#         if len(comments) >= max_posts * max_comments_per_post:
+#             break
+#     return comments, timestamps
+# def preprocess_text(text):
+#     text = re.sub(r'[^a-zA-Z0-9\s]', '', text.lower())
+#     text = re.sub(r'\s+', ' ', text)
+#     return text.strip()
+# def analyze_sentiment_vader(comments):
+#     return [analyzer.polarity_scores(comment)['compound'] for comment in comments]
+# def generate_wordcloud(comments):
+#     filtered_words = ' '.join([word for word in ' '.join(comments).split() if word not in stop_words_list])
+#     return WordCloud(width=400, height=400, background_color='white').generate(filtered_words)
+# def extract_features(comments):
+#     vectorizer = CountVectorizer(stop_words=stop_words_list, max_features=50)
+#     X = vectorizer.fit_transform(comments)
+#     return vectorizer.get_feature_names_out(), X.sum(axis=0).A1
+# query = st.text_input("Enter a keyword to search for Reddit comments", value="data analyst bootcamp online course")
+# start_date = st.date_input("Start Date", value=pd.to_datetime("2024-01-01").date())
+# end_date = st.date_input("End Date", value=pd.to_datetime("today").date())
+# if st.button("Analyze"):
+#     comments, timestamps = fetch_reddit_data(query)
+#     print('done fetching', len(comments))
+#     if not comments:
+#         st.warning("No comments found for this search query.")
+#     else:
+#         cleaned_comments = [preprocess_text(comment) for comment in comments]
+#         print('preprocessed')
+#         sentiment_scores = analyze_sentiment(cleaned_comments)
+#         df = pd.DataFrame({
+#             'comment': cleaned_comments,
+#             'sentiment': sentiment_scores,
+#             'created_at': timestamps
+#         })
+#         print('ANALYZED')
+#         df['created_at'] = pd.to_datetime(df['created_at'])
+#         df.set_index('created_at', inplace=True)
+#         df = df[(df.index >= pd.Timestamp(start_date)) & (df.index <= pd.Timestamp(end_date))]
+#         # Ensure the 'sentiment' column contains numeric values (floats)
+#         df['sentiment'] = pd.to_numeric(df['sentiment'], errors='coerce')
+#         # Apply sentiment categorization
+#         df['sentiment_category'] = df['sentiment'].apply(lambda x: 'positive' if x > 0 else 'negative' if x < 0 else 'neutral')
+#         #df['sentiment_category'] = df['sentiment'].apply(lambda x: 'positive' if x > 0 else 'negative' if x < 0 else 'neutral')
+#         st.session_state.df = df
+#         st.session_state.cleaned_comments = cleaned_comments
+#         if 'df' in st.session_state:
+#             df = st.session_state.df
+#             cleaned_comments = st.session_state.cleaned_comments
+#         st.subheader("Key Metrics")
+#         col1, col2, col3, col4 = st.columns(4)
+#         col1.metric("Total Comments", len(df))
+#         col2.metric("Positive", len(df[df['sentiment_category'] == 'positive']))
+#         col3.metric("Neutral", len(df[df['sentiment_category'] == 'neutral']))
+#         col4.metric("Negative", len(df[df['sentiment_category'] == 'negative']))
+#         st.subheader("Sentiment Distribution")
+#         sentiment_counts = df['sentiment_category'].value_counts()
+#         fig1, ax1 = plt.subplots()
+#         ax1.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90)
+#         ax1.axis('equal')
+#         st.subheader("Top Keywords Affecting Sentiment")
+#         vectorizer = TfidfVectorizer(stop_words=stop_words_list, max_features=50)
+#         X = vectorizer.fit_transform(df['comment'])
+#         features = vectorizer.get_feature_names_out()
+#         scores = np.asarray(X.mean(axis=0)).flatten()
+#         keywords_df = pd.DataFrame({'Keyword': features, 'Score': scores})
+#         top_keywords = keywords_df.sort_values(by='Score', ascending=False).head(10)
+#         fig2, ax2 = plt.subplots(figsize=(10, 5))
+#         top_keywords.plot(kind='bar', x='Keyword', y='Score', ax=ax2, color='darkorange')
+#         ax2.set_xlabel("Keyword")
+#         ax2.set_ylabel("TF-IDF Score")
+#         ax2.set_title("Top Keywords Affecting Sentiment")
+#         plt.xticks(rotation=45, ha='right')
+#         col1, col2 = st.columns(2)
+#         with col1:
+#             st.pyplot(fig1)
+#         with col2:
+#             st.pyplot(fig2)
+#         st.subheader("Word Cloud and Feature Importance Analysis")
+#         col1, col2 = st.columns(2)
+#         with col1:
+#             filtered_words = ' '.join([word for word in ' '.join(cleaned_comments).split() if word.lower() not in stop_words_list])
+#             wordcloud = WordCloud(width=400, height=400, background_color='white').generate(filtered_words)
+#             plt.figure(figsize=(5, 4))
+#             plt.imshow(wordcloud, interpolation='bilinear')
+#             plt.axis('off')
+#             st.pyplot(plt)
+#         with col2:
+#             feature_names, feature_counts = extract_features(cleaned_comments)
+#             feature_df = pd.DataFrame({'Feature': feature_names, 'Count': feature_counts})
+#             feature_df = feature_df.sort_values(by='Count', ascending=False).head(10)
+#             fig3, ax3 = plt.subplots(figsize=(10, 5))
+#             feature_df.plot(kind='bar', x='Feature', y='Count', ax=ax3, color='salmon')
+#             ax3.set_xlabel("Feature")
+#             ax3.set_ylabel("Frequency")
+#             ax3.set_title("Top Keywords Impacting Sentiment")
+#             plt.xticks(rotation=45, ha='right')
+#             st.pyplot(fig3)
+#         st.subheader("Sentiment Over Time")
+#         sentiment_over_time = df['sentiment'].resample('W').mean()
+#         fig4, ax4 = plt.subplots(figsize=(10, 5))
+#         ax4.plot(sentiment_over_time.index, sentiment_over_time.values, marker='o')
+#         ax4.set_xlabel("Date")
+#         ax4.set_ylabel("Average Sentiment")
+#         st.pyplot(fig4)
+#         st.subheader("Sentiment Distribution by Hour of Day")
+#         df['hour'] = df.index.hour
+#         sentiment_by_hour = df.groupby('hour')['sentiment'].mean()
+#         fig5, ax5 = plt.subplots(figsize=(10, 5))
+#         ax5.plot(sentiment_by_hour.index, sentiment_by_hour.values, marker='o')
+#         ax5.set_xlabel("Hour of Day")
+#         ax5.set_ylabel("Average Sentiment")
+#         st.pyplot(fig5)
+#         st.subheader("Comment Summaries")
+#         for comment in cleaned_comments:
+#             st.write(summarize_text(comment))

requirements.txt ADDED Viewed

	@@ -0,0 +1,82 @@

+altair==4.2.0
+attrs==24.2.0
+blinker==1.8.2
+cachetools==5.4.0
+certifi==2024.7.4
+charset-normalizer==3.3.2
+click==8.1.7
+contourpy==1.2.1
+cycler==0.12.1
+entrypoints==0.4
+filelock==3.15.4
+fonttools==4.53.1
+fsspec==2024.6.1
+gitdb==4.0.11
+GitPython==3.1.43
+huggingface-hub==0.24.5
+idna==3.7
+importlib_metadata==8.2.0
+importlib_resources==6.4.3
+Jinja2==3.1.4
+joblib==1.4.2
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+mdurl==0.1.2
+mpmath==1.3.0
+narwhals==1.4.2
+networkx==3.2.1
+nltk==3.8.1
+numpy==1.26.4
+packaging==24.1
+pandas==2.2.2
+pillow==10.4.0
+praw==7.7.1
+prawcore==2.4.0
+protobuf==3.20.3
+pyarrow==17.0.0
+pydeck==0.9.1
+Pygments==2.18.0
+Pympler==1.1
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+pytz==2024.1
+PyYAML==6.0.2
+referencing==0.35.1
+regex==2024.7.24
+requests==2.32.3
+rich==13.7.1
+rpds-py==0.20.0
+safetensors==0.4.4
+scikit-learn==1.5.1
+scipy==1.13.1
+seaborn==0.13.2
+semver==3.0.2
+six==1.16.0
+sklearn==0.0
+smmap==5.0.1
+streamlit==1.12.0
+sympy==1.13.2
+threadpoolctl==3.5.0
+tokenizers==0.19.1
+toml==0.10.2
+toolz==0.12.1
+torch==2.2.2
+torchaudio==2.2.2
+torchvision==0.17.2
+tornado==6.4.1
+tqdm==4.66.5
+transformers==4.44.0
+typing_extensions==4.12.2
+tzdata==2024.1
+tzlocal==5.2
+update-checker==0.18.0
+urllib3==2.2.2
+vaderSentiment==3.3.2
+validators==0.33.0
+websocket-client==1.8.0
+wordcloud==1.9.3
+zipp==3.20.0