Spaces:
Sleeping
Sleeping
Akshayram1
commited on
Update app (3).py
Browse files- app (3).py +95 -79
app (3).py
CHANGED
@@ -2,47 +2,44 @@ import streamlit as st
|
|
2 |
from urllib.request import urlopen, Request
|
3 |
from bs4 import BeautifulSoup
|
4 |
import pandas as pd
|
5 |
-
import plotly
|
6 |
import plotly.express as px
|
7 |
-
import json # for graph plotting in website
|
8 |
-
# NLTK VADER for sentiment analysis
|
9 |
from dateutil import parser
|
10 |
import nltk
|
11 |
nltk.downloader.download('vader_lexicon')
|
12 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
13 |
-
|
14 |
-
import subprocess
|
15 |
-
import os
|
16 |
-
|
17 |
import datetime
|
|
|
18 |
|
19 |
-
st.set_page_config(page_title
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
def get_news(ticker):
|
|
|
23 |
url = finviz_url + ticker
|
24 |
-
req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
|
25 |
response = urlopen(req)
|
26 |
-
|
27 |
-
html = BeautifulSoup(response)
|
28 |
-
# Find 'news-table' in the Soup and load it into 'news_table'
|
29 |
news_table = html.find(id='news-table')
|
30 |
return news_table
|
31 |
-
|
32 |
-
# parse news into dataframe
|
33 |
-
|
34 |
|
35 |
def parse_news(news_table):
|
36 |
parsed_news = []
|
37 |
|
38 |
for x in news_table.findAll('tr'):
|
39 |
try:
|
40 |
-
# Get the headline text
|
41 |
text = x.a.get_text()
|
42 |
-
|
43 |
date_scrape = x.td.text.strip().split()
|
44 |
|
45 |
-
# Handle cases where only time is present
|
46 |
if len(date_scrape) == 1:
|
47 |
date = datetime.datetime.today().strftime('%Y-%m-%d')
|
48 |
time = date_scrape[0]
|
@@ -50,103 +47,122 @@ def parse_news(news_table):
|
|
50 |
date = date_scrape[0]
|
51 |
time = date_scrape[1]
|
52 |
|
53 |
-
# Parse the date and time using dateutil.parser
|
54 |
datetime_str = f"{date} {time}"
|
55 |
datetime_parsed = parser.parse(datetime_str)
|
56 |
|
57 |
-
|
58 |
-
|
|
|
59 |
|
60 |
except Exception as e:
|
61 |
print("Error parsing news:", e)
|
62 |
continue
|
63 |
|
64 |
-
|
65 |
-
columns = ['datetime', 'headline']
|
66 |
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
|
67 |
|
68 |
return parsed_news_df
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
def score_news(parsed_news_df):
|
73 |
-
# Instantiate the sentiment intensity analyzer
|
74 |
vader = SentimentIntensityAnalyzer()
|
75 |
|
76 |
-
# Iterate through the headlines and get the polarity scores using vader
|
77 |
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
|
78 |
-
|
79 |
-
# Convert the 'scores' list of dicts into a DataFrame
|
80 |
scores_df = pd.DataFrame(scores)
|
81 |
-
|
82 |
-
# Join the DataFrames of the news and the list of dicts
|
83 |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
|
84 |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
|
85 |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
|
86 |
|
87 |
return parsed_and_scored_news
|
88 |
|
89 |
-
|
90 |
-
|
91 |
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
|
92 |
-
# Ensure that only numeric columns are resampled
|
93 |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
94 |
-
|
95 |
-
# Group by date and ticker columns from scored_news and calculate the mean
|
96 |
mean_scores = numeric_cols.resample('h').mean()
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
|
102 |
def plot_daily_sentiment(parsed_and_scored_news, ticker):
|
103 |
-
# Ensure that only numeric columns are resampled
|
104 |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
105 |
-
|
106 |
-
# Group by date and ticker columns from scored_news and calculate the mean
|
107 |
mean_scores = numeric_cols.resample('D').mean()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
|
|
117 |
|
118 |
st.header("Stock News Sentiment Analyzer")
|
119 |
|
120 |
ticker = st.text_input('Enter Stock Ticker', '').upper()
|
121 |
|
122 |
-
df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
|
123 |
-
|
124 |
-
|
125 |
try:
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
except Exception as e:
|
148 |
-
|
149 |
-
|
150 |
|
151 |
hide_streamlit_style = """
|
152 |
<style>
|
@@ -154,4 +170,4 @@ hide_streamlit_style = """
|
|
154 |
footer {visibility: hidden;}
|
155 |
</style>
|
156 |
"""
|
157 |
-
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
|
|
2 |
from urllib.request import urlopen, Request
|
3 |
from bs4 import BeautifulSoup
|
4 |
import pandas as pd
|
|
|
5 |
import plotly.express as px
|
|
|
|
|
6 |
from dateutil import parser
|
7 |
import nltk
|
8 |
nltk.downloader.download('vader_lexicon')
|
9 |
from nltk.sentiment.vader import SentimentIntensityAnalyzer
|
|
|
|
|
|
|
|
|
10 |
import datetime
|
11 |
+
import requests
|
12 |
|
13 |
+
st.set_page_config(page_title="Stock News Sentiment Analyzer", layout="wide")
|
14 |
|
15 |
+
def verify_link(url, timeout=10, retries=3):
|
16 |
+
for _ in range(retries):
|
17 |
+
try:
|
18 |
+
response = requests.head(url, timeout=timeout, allow_redirects=True)
|
19 |
+
if 200 <= response.status_code < 300:
|
20 |
+
return True
|
21 |
+
except requests.RequestException:
|
22 |
+
continue
|
23 |
+
return False
|
24 |
|
25 |
def get_news(ticker):
|
26 |
+
finviz_url = 'https://finviz.com/quote.ashx?t='
|
27 |
url = finviz_url + ticker
|
28 |
+
req = Request(url=url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
|
29 |
response = urlopen(req)
|
30 |
+
html = BeautifulSoup(response, 'html.parser')
|
|
|
|
|
31 |
news_table = html.find(id='news-table')
|
32 |
return news_table
|
|
|
|
|
|
|
33 |
|
34 |
def parse_news(news_table):
|
35 |
parsed_news = []
|
36 |
|
37 |
for x in news_table.findAll('tr'):
|
38 |
try:
|
|
|
39 |
text = x.a.get_text()
|
40 |
+
link = x.a['href']
|
41 |
date_scrape = x.td.text.strip().split()
|
42 |
|
|
|
43 |
if len(date_scrape) == 1:
|
44 |
date = datetime.datetime.today().strftime('%Y-%m-%d')
|
45 |
time = date_scrape[0]
|
|
|
47 |
date = date_scrape[0]
|
48 |
time = date_scrape[1]
|
49 |
|
|
|
50 |
datetime_str = f"{date} {time}"
|
51 |
datetime_parsed = parser.parse(datetime_str)
|
52 |
|
53 |
+
is_valid = verify_link(link)
|
54 |
+
|
55 |
+
parsed_news.append([datetime_parsed, text, link, is_valid])
|
56 |
|
57 |
except Exception as e:
|
58 |
print("Error parsing news:", e)
|
59 |
continue
|
60 |
|
61 |
+
columns = ['datetime', 'headline', 'link', 'is_valid']
|
|
|
62 |
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
|
63 |
|
64 |
return parsed_news_df
|
65 |
+
|
|
|
|
|
66 |
def score_news(parsed_news_df):
|
|
|
67 |
vader = SentimentIntensityAnalyzer()
|
68 |
|
|
|
69 |
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
|
|
|
|
|
70 |
scores_df = pd.DataFrame(scores)
|
|
|
|
|
71 |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
|
72 |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
|
73 |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
|
74 |
|
75 |
return parsed_and_scored_news
|
76 |
|
|
|
|
|
77 |
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
|
|
|
78 |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
|
|
|
|
79 |
mean_scores = numeric_cols.resample('h').mean()
|
80 |
+
|
81 |
+
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score',
|
82 |
+
title=f'{ticker} Hourly Sentiment Scores',
|
83 |
+
color='sentiment_score',
|
84 |
+
color_continuous_scale=['red', 'yellow', 'green'],
|
85 |
+
range_color=[-1, 1])
|
86 |
+
|
87 |
+
fig.update_layout(coloraxis_colorbar=dict(
|
88 |
+
title="Sentiment",
|
89 |
+
tickvals=[-1, 0, 1],
|
90 |
+
ticktext=["Negative", "Neutral", "Positive"],
|
91 |
+
))
|
92 |
+
|
93 |
+
return fig
|
94 |
|
95 |
def plot_daily_sentiment(parsed_and_scored_news, ticker):
|
|
|
96 |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64'])
|
|
|
|
|
97 |
mean_scores = numeric_cols.resample('D').mean()
|
98 |
+
|
99 |
+
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score',
|
100 |
+
title=f'{ticker} Daily Sentiment Scores',
|
101 |
+
color='sentiment_score',
|
102 |
+
color_continuous_scale=['red', 'yellow', 'green'],
|
103 |
+
range_color=[-1, 1])
|
104 |
+
|
105 |
+
fig.update_layout(coloraxis_colorbar=dict(
|
106 |
+
title="Sentiment",
|
107 |
+
tickvals=[-1, 0, 1],
|
108 |
+
ticktext=["Negative", "Neutral", "Positive"],
|
109 |
+
))
|
110 |
+
|
111 |
+
return fig
|
112 |
|
113 |
+
def get_recommendation(sentiment_scores):
|
114 |
+
avg_sentiment = sentiment_scores['sentiment_score'].mean()
|
115 |
+
|
116 |
+
if avg_sentiment >= 0.05:
|
117 |
+
return f"Positive sentiment (Score: {avg_sentiment:.2f}). The recent news suggests a favorable outlook for this stock. Consider buying or holding if you already own it."
|
118 |
+
elif avg_sentiment <= -0.05:
|
119 |
+
return f"Negative sentiment (Score: {avg_sentiment:.2f}). The recent news suggests caution. Consider selling or avoiding this stock for now."
|
120 |
+
else:
|
121 |
+
return f"Neutral sentiment (Score: {avg_sentiment:.2f}). The recent news doesn't show a strong bias. Consider holding if you own the stock, or watch for more definitive trends before making a decision."
|
122 |
|
123 |
st.header("Stock News Sentiment Analyzer")
|
124 |
|
125 |
ticker = st.text_input('Enter Stock Ticker', '').upper()
|
126 |
|
|
|
|
|
|
|
127 |
try:
|
128 |
+
st.subheader(f"Sentiment Analysis and Recommendation for {ticker} Stock")
|
129 |
+
news_table = get_news(ticker)
|
130 |
+
parsed_news_df = parse_news(news_table)
|
131 |
+
parsed_and_scored_news = score_news(parsed_news_df)
|
132 |
+
|
133 |
+
# Generate and display recommendation
|
134 |
+
recommendation = get_recommendation(parsed_and_scored_news)
|
135 |
+
st.write(recommendation)
|
136 |
+
|
137 |
+
# Display a disclaimer
|
138 |
+
st.warning("Disclaimer: This recommendation is based solely on recent news sentiment and should not be considered as financial advice. Always do your own research and consult with a qualified financial advisor before making investment decisions.")
|
139 |
+
|
140 |
+
fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
|
141 |
+
fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
|
142 |
+
|
143 |
+
st.plotly_chart(fig_hourly)
|
144 |
+
st.plotly_chart(fig_daily)
|
145 |
+
|
146 |
+
description = f"""
|
147 |
+
The above charts average the sentiment scores of {ticker} stock hourly and daily.
|
148 |
+
The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
|
149 |
+
The news headlines are obtained from the FinViz website.
|
150 |
+
Sentiments are given by the nltk.sentiment.vader Python library.
|
151 |
+
Links have been verified for validity.
|
152 |
+
"""
|
153 |
+
|
154 |
+
st.write(description)
|
155 |
+
|
156 |
+
parsed_and_scored_news['link'] = parsed_and_scored_news.apply(
|
157 |
+
lambda row: f'<a href="{row["link"]}" target="_blank">{"Valid✅" if row["is_valid"] else "Invalid❌"} Link</a>',
|
158 |
+
axis=1
|
159 |
+
)
|
160 |
+
|
161 |
+
st.write(parsed_and_scored_news.drop(columns=['is_valid']).to_html(escape=False), unsafe_allow_html=True)
|
162 |
+
|
163 |
except Exception as e:
|
164 |
+
print(str(e))
|
165 |
+
st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")
|
166 |
|
167 |
hide_streamlit_style = """
|
168 |
<style>
|
|
|
170 |
footer {visibility: hidden;}
|
171 |
</style>
|
172 |
"""
|
173 |
+
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|