Akshayram1 commited on
Commit
10bfc4e
·
verified ·
1 Parent(s): 073f2cc

Update app (3).py

Browse files
Files changed (1) hide show
  1. app (3).py +43 -36
app (3).py CHANGED
@@ -6,6 +6,7 @@ import plotly
6
  import plotly.express as px
7
  import json # for graph plotting in website
8
  # NLTK VADER for sentiment analysis
 
9
  import nltk
10
  nltk.downloader.download('vader_lexicon')
11
  from nltk.sentiment.vader import SentimentIntensityAnalyzer
@@ -15,7 +16,7 @@ import os
15
 
16
  import datetime
17
 
18
- st.set_page_config(page_title = "Akshay's Stock News Sentiment Analyzer", layout = "wide")
19
 
20
 
21
  def get_news(ticker):
@@ -29,40 +30,41 @@ def get_news(ticker):
29
  return news_table
30
 
31
  # parse news into dataframe
 
 
32
  def parse_news(news_table):
33
  parsed_news = []
34
- today_string = datetime.datetime.today().strftime('%Y-%m-%d')
35
 
36
  for x in news_table.findAll('tr'):
37
  try:
38
- # read the text from each tr tag into text
39
- # get text from a only
40
- text = x.a.get_text()
41
- # splite text in the td tag into a list
42
- date_scrape = x.td.text.split()
43
- # if the length of 'date_scrape' is 1, load 'time' as the only element
44
-
45
  if len(date_scrape) == 1:
 
46
  time = date_scrape[0]
47
-
48
- # else load 'date' as the 1st element and 'time' as the second
49
  else:
50
  date = date_scrape[0]
51
  time = date_scrape[1]
 
 
 
 
 
 
 
52
 
53
- # Append ticker, date, time and headline as a list to the 'parsed_news' list
54
- parsed_news.append([date, time, text])
55
- except:
56
- pass
57
-
58
- # Set column names
59
- columns = ['date', 'time', 'headline']
60
- # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
61
- parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
62
- # Create a pandas datetime object from the strings in 'date' and 'time' column
63
- parsed_news_df['date'] = parsed_news_df['date'].replace("Today", today_string)
64
- parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
65
-
66
  return parsed_news_df
67
 
68
 
@@ -80,35 +82,40 @@ def score_news(parsed_news_df):
80
  # Join the DataFrames of the news and the list of dicts
81
  parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
82
  parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
83
- parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)
84
  parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
85
 
86
  return parsed_and_scored_news
87
 
88
 
 
89
  def plot_hourly_sentiment(parsed_and_scored_news, ticker):
90
-
 
 
91
  # Group by date and ticker columns from scored_news and calculate the mean
92
- mean_scores = parsed_and_scored_news.resample('H').mean()
93
 
94
- # Plot a bar chart with plotly
95
- fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
96
- return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
97
 
98
  def plot_daily_sentiment(parsed_and_scored_news, ticker):
99
-
 
 
100
  # Group by date and ticker columns from scored_news and calculate the mean
101
- mean_scores = parsed_and_scored_news.resample('D').mean()
 
 
 
 
102
 
103
- # Plot a bar chart with plotly
104
- fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
105
- return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
106
 
107
  # for extracting data from finviz
108
  finviz_url = 'https://finviz.com/quote.ashx?t='
109
 
110
 
111
- st.header("Bohmian's Stock News Sentiment Analyzer")
112
 
113
  ticker = st.text_input('Enter Stock Ticker', '').upper()
114
 
 
6
  import plotly.express as px
7
  import json # for graph plotting in website
8
  # NLTK VADER for sentiment analysis
9
+ from dateutil import parser
10
  import nltk
11
  nltk.downloader.download('vader_lexicon')
12
  from nltk.sentiment.vader import SentimentIntensityAnalyzer
 
16
 
17
  import datetime
18
 
19
+ st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
20
 
21
 
22
  def get_news(ticker):
 
30
  return news_table
31
 
32
  # parse news into dataframe
33
+
34
+
35
  def parse_news(news_table):
36
  parsed_news = []
 
37
 
38
  for x in news_table.findAll('tr'):
39
  try:
40
+ # Get the headline text
41
+ text = x.a.get_text()
42
+ # Get the date and time from the first <td> tag
43
+ date_scrape = x.td.text.strip().split()
44
+
45
+ # Handle cases where only time is present
 
46
  if len(date_scrape) == 1:
47
+ date = datetime.datetime.today().strftime('%Y-%m-%d')
48
  time = date_scrape[0]
 
 
49
  else:
50
  date = date_scrape[0]
51
  time = date_scrape[1]
52
+
53
+ # Parse the date and time using dateutil.parser
54
+ datetime_str = f"{date} {time}"
55
+ datetime_parsed = parser.parse(datetime_str)
56
+
57
+ # Append the parsed news to the list
58
+ parsed_news.append([datetime_parsed, text])
59
 
60
+ except Exception as e:
61
+ print("Error parsing news:", e)
62
+ continue
63
+
64
+ # Convert the list to a DataFrame
65
+ columns = ['datetime', 'headline']
66
+ parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
67
+
 
 
 
 
 
68
  return parsed_news_df
69
 
70
 
 
82
  # Join the DataFrames of the news and the list of dicts
83
  parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
84
  parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
 
85
  parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
86
 
87
  return parsed_and_scored_news
88
 
89
 
90
+
91
  def plot_hourly_sentiment(parsed_and_scored_news, ticker):
92
+ # Ensure that only numeric columns are resampled
93
+ numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
94
+
95
  # Group by date and ticker columns from scored_news and calculate the mean
96
+ mean_scores = numeric_cols.resample('h').mean()
97
 
98
+ # Plot a bar chart with Plotly
99
+ fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
100
+ return fig # Return the figure to display in the Streamlit app
101
 
102
  def plot_daily_sentiment(parsed_and_scored_news, ticker):
103
+ # Ensure that only numeric columns are resampled
104
+ numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
105
+
106
  # Group by date and ticker columns from scored_news and calculate the mean
107
+ mean_scores = numeric_cols.resample('D').mean()
108
+
109
+ # Plot a bar chart with Plotly
110
+ fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
111
+ return fig # Return the figure to display in the Streamlit app
112
 
 
 
 
113
 
114
  # for extracting data from finviz
115
  finviz_url = 'https://finviz.com/quote.ashx?t='
116
 
117
 
118
+ st.header("Stock News Sentiment Analyzer")
119
 
120
  ticker = st.text_input('Enter Stock Ticker', '').upper()
121