Akshayram1 commited on
Commit
674b16f
·
verified ·
1 Parent(s): 7ed461b

Update app (3).py

Browse files
Files changed (1) hide show
  1. app (3).py +95 -79
app (3).py CHANGED
@@ -2,47 +2,44 @@ import streamlit as st
2
  from urllib.request import urlopen, Request
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
5
- import plotly
6
  import plotly.express as px
7
- import json # for graph plotting in website
8
- # NLTK VADER for sentiment analysis
9
  from dateutil import parser
10
  import nltk
11
  nltk.downloader.download('vader_lexicon')
12
  from nltk.sentiment.vader import SentimentIntensityAnalyzer
13
-
14
- import subprocess
15
- import os
16
-
17
  import datetime
 
18
 
19
- st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide")
20
 
 
 
 
 
 
 
 
 
 
21
 
22
  def get_news(ticker):
 
23
  url = finviz_url + ticker
24
- req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
25
  response = urlopen(req)
26
- # Read the contents of the file into 'html'
27
- html = BeautifulSoup(response)
28
- # Find 'news-table' in the Soup and load it into 'news_table'
29
  news_table = html.find(id='news-table')
30
  return news_table
31
-
32
- # parse news into dataframe
33
-
34
 
35
  def parse_news(news_table):
36
  parsed_news = []
37
 
38
  for x in news_table.findAll('tr'):
39
  try:
40
- # Get the headline text
41
  text = x.a.get_text()
42
- # Get the date and time from the first <td> tag
43
  date_scrape = x.td.text.strip().split()
44
 
45
- # Handle cases where only time is present
46
  if len(date_scrape) == 1:
47
  date = datetime.datetime.today().strftime('%Y-%m-%d')
48
  time = date_scrape[0]
@@ -50,103 +47,122 @@ def parse_news(news_table):
50
  date = date_scrape[0]
51
  time = date_scrape[1]
52
 
53
- # Parse the date and time using dateutil.parser
54
  datetime_str = f"{date} {time}"
55
  datetime_parsed = parser.parse(datetime_str)
56
 
57
- # Append the parsed news to the list
58
- parsed_news.append([datetime_parsed, text])
 
59
 
60
  except Exception as e:
61
  print("Error parsing news:", e)
62
  continue
63
 
64
- # Convert the list to a DataFrame
65
- columns = ['datetime', 'headline']
66
  parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
67
 
68
  return parsed_news_df
69
-
70
-
71
-
72
  def score_news(parsed_news_df):
73
- # Instantiate the sentiment intensity analyzer
74
  vader = SentimentIntensityAnalyzer()
75
 
76
- # Iterate through the headlines and get the polarity scores using vader
77
  scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
78
-
79
- # Convert the 'scores' list of dicts into a DataFrame
80
  scores_df = pd.DataFrame(scores)
81
-
82
- # Join the DataFrames of the news and the list of dicts
83
  parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
84
  parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
85
  parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
86
 
87
  return parsed_and_scored_news
88
 
89
-
90
-
91
  def plot_hourly_sentiment(parsed_and_scored_news, ticker):
92
- # Ensure that only numeric columns are resampled
93
  numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
94
-
95
- # Group by date and ticker columns from scored_news and calculate the mean
96
  mean_scores = numeric_cols.resample('h').mean()
97
-
98
- # Plot a bar chart with Plotly
99
- fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores')
100
- return fig # Return the figure to display in the Streamlit app
 
 
 
 
 
 
 
 
 
 
101
 
102
  def plot_daily_sentiment(parsed_and_scored_news, ticker):
103
- # Ensure that only numeric columns are resampled
104
  numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
105
-
106
- # Group by date and ticker columns from scored_news and calculate the mean
107
  mean_scores = numeric_cols.resample('D').mean()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- # Plot a bar chart with Plotly
110
- fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores')
111
- return fig # Return the figure to display in the Streamlit app
112
-
113
-
114
- # for extracting data from finviz
115
- finviz_url = 'https://finviz.com/quote.ashx?t='
116
-
 
117
 
118
  st.header("Stock News Sentiment Analyzer")
119
 
120
  ticker = st.text_input('Enter Stock Ticker', '').upper()
121
 
122
- df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
123
-
124
-
125
  try:
126
- st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
127
- news_table = get_news(ticker)
128
- parsed_news_df = parse_news(news_table)
129
- print(parsed_news_df)
130
- parsed_and_scored_news = score_news(parsed_news_df)
131
- fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
132
- fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
133
-
134
- st.plotly_chart(fig_hourly)
135
- st.plotly_chart(fig_daily)
136
-
137
- description = """
138
- The above chart averages the sentiment scores of {} stock hourly and daily.
139
- The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
140
- The news headlines are obtained from the FinViz website.
141
- Sentiments are given by the nltk.sentiment.vader Python library.
142
- """.format(ticker)
143
-
144
- st.write(description)
145
- st.table(parsed_and_scored_news)
146
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  except Exception as e:
148
- print(str(e))
149
- st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")
150
 
151
  hide_streamlit_style = """
152
  <style>
@@ -154,4 +170,4 @@ hide_streamlit_style = """
154
  footer {visibility: hidden;}
155
  </style>
156
  """
157
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
 
2
  from urllib.request import urlopen, Request
3
  from bs4 import BeautifulSoup
4
  import pandas as pd
 
5
  import plotly.express as px
 
 
6
  from dateutil import parser
7
  import nltk
8
  nltk.downloader.download('vader_lexicon')
9
  from nltk.sentiment.vader import SentimentIntensityAnalyzer
 
 
 
 
10
  import datetime
11
+ import requests
12
 
13
+ st.set_page_config(page_title="Stock News Sentiment Analyzer", layout="wide")
14
 
15
+ def verify_link(url, timeout=10, retries=3):
16
+ for _ in range(retries):
17
+ try:
18
+ response = requests.head(url, timeout=timeout, allow_redirects=True)
19
+ if 200 <= response.status_code < 300:
20
+ return True
21
+ except requests.RequestException:
22
+ continue
23
+ return False
24
 
25
  def get_news(ticker):
26
+ finviz_url = 'https://finviz.com/quote.ashx?t='
27
  url = finviz_url + ticker
28
+ req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
29
  response = urlopen(req)
30
+ html = BeautifulSoup(response, 'html.parser')
 
 
31
  news_table = html.find(id='news-table')
32
  return news_table
 
 
 
33
 
34
  def parse_news(news_table):
35
  parsed_news = []
36
 
37
  for x in news_table.findAll('tr'):
38
  try:
 
39
  text = x.a.get_text()
40
+ link = x.a['href']
41
  date_scrape = x.td.text.strip().split()
42
 
 
43
  if len(date_scrape) == 1:
44
  date = datetime.datetime.today().strftime('%Y-%m-%d')
45
  time = date_scrape[0]
 
47
  date = date_scrape[0]
48
  time = date_scrape[1]
49
 
 
50
  datetime_str = f"{date} {time}"
51
  datetime_parsed = parser.parse(datetime_str)
52
 
53
+ is_valid = verify_link(link)
54
+
55
+ parsed_news.append([datetime_parsed, text, link, is_valid])
56
 
57
  except Exception as e:
58
  print("Error parsing news:", e)
59
  continue
60
 
61
+ columns = ['datetime', 'headline', 'link', 'is_valid']
 
62
  parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
63
 
64
  return parsed_news_df
65
+
 
 
66
  def score_news(parsed_news_df):
 
67
  vader = SentimentIntensityAnalyzer()
68
 
 
69
  scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
 
 
70
  scores_df = pd.DataFrame(scores)
 
 
71
  parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
72
  parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
73
  parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
74
 
75
  return parsed_and_scored_news
76
 
 
 
77
  def plot_hourly_sentiment(parsed_and_scored_news, ticker):
 
78
  numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
 
 
79
  mean_scores = numeric_cols.resample('h').mean()
80
+
81
+ fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score',
82
+ title=f'{ticker} Hourly Sentiment Scores',
83
+ color='sentiment_score',
84
+ color_continuous_scale=['red', 'yellow', 'green'],
85
+ range_color=[-1, 1])
86
+
87
+ fig.update_layout(coloraxis_colorbar=dict(
88
+ title="Sentiment",
89
+ tickvals=[-1, 0, 1],
90
+ ticktext=["Negative", "Neutral", "Positive"],
91
+ ))
92
+
93
+ return fig
94
 
95
  def plot_daily_sentiment(parsed_and_scored_news, ticker):
 
96
  numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
 
 
97
  mean_scores = numeric_cols.resample('D').mean()
98
+
99
+ fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score',
100
+ title=f'{ticker} Daily Sentiment Scores',
101
+ color='sentiment_score',
102
+ color_continuous_scale=['red', 'yellow', 'green'],
103
+ range_color=[-1, 1])
104
+
105
+ fig.update_layout(coloraxis_colorbar=dict(
106
+ title="Sentiment",
107
+ tickvals=[-1, 0, 1],
108
+ ticktext=["Negative", "Neutral", "Positive"],
109
+ ))
110
+
111
+ return fig
112
 
113
+ def get_recommendation(sentiment_scores):
114
+ avg_sentiment = sentiment_scores['sentiment_score'].mean()
115
+
116
+ if avg_sentiment >= 0.05:
117
+ return f"Positive sentiment (Score: {avg_sentiment:.2f}). The recent news suggests a favorable outlook for this stock. Consider buying or holding if you already own it."
118
+ elif avg_sentiment <= -0.05:
119
+ return f"Negative sentiment (Score: {avg_sentiment:.2f}). The recent news suggests caution. Consider selling or avoiding this stock for now."
120
+ else:
121
+ return f"Neutral sentiment (Score: {avg_sentiment:.2f}). The recent news doesn't show a strong bias. Consider holding if you own the stock, or watch for more definitive trends before making a decision."
122
 
123
  st.header("Stock News Sentiment Analyzer")
124
 
125
  ticker = st.text_input('Enter Stock Ticker', '').upper()
126
 
 
 
 
127
  try:
128
+ st.subheader(f"Sentiment Analysis and Recommendation for {ticker} Stock")
129
+ news_table = get_news(ticker)
130
+ parsed_news_df = parse_news(news_table)
131
+ parsed_and_scored_news = score_news(parsed_news_df)
132
+
133
+ # Generate and display recommendation
134
+ recommendation = get_recommendation(parsed_and_scored_news)
135
+ st.write(recommendation)
136
+
137
+ # Display a disclaimer
138
+ st.warning("Disclaimer: This recommendation is based solely on recent news sentiment and should not be considered as financial advice. Always do your own research and consult with a qualified financial advisor before making investment decisions.")
139
+
140
+ fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
141
+ fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
142
+
143
+ st.plotly_chart(fig_hourly)
144
+ st.plotly_chart(fig_daily)
145
+
146
+ description = f"""
147
+ The above charts average the sentiment scores of {ticker} stock hourly and daily.
148
+ The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
149
+ The news headlines are obtained from the FinViz website.
150
+ Sentiments are given by the nltk.sentiment.vader Python library.
151
+ Links have been verified for validity.
152
+ """
153
+
154
+ st.write(description)
155
+
156
+ parsed_and_scored_news['link'] = parsed_and_scored_news.apply(
157
+ lambda row: f'<a href="{row["link"]}" target="_blank">{"Valid✅" if row["is_valid"] else "Invalid❌"} Link</a>',
158
+ axis=1
159
+ )
160
+
161
+ st.write(parsed_and_scored_news.drop(columns=['is_valid']).to_html(escape=False), unsafe_allow_html=True)
162
+
163
  except Exception as e:
164
+ print(str(e))
165
+ st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")
166
 
167
  hide_streamlit_style = """
168
  <style>
 
170
  footer {visibility: hidden;}
171
  </style>
172
  """
173
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)