Spaces:
Sleeping
Sleeping
File size: 5,708 Bytes
3069ab1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import streamlit as st
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import plotly
import plotly.express as px
import json # for graph plotting in website
# NLTK VADER for sentiment analysis
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import subprocess
import os
import datetime
st.set_page_config(page_title = "Akshay's Stock News Sentiment Analyzer", layout = "wide")
def get_news(ticker):
url = finviz_url + ticker
req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'})
response = urlopen(req)
# Read the contents of the file into 'html'
html = BeautifulSoup(response)
# Find 'news-table' in the Soup and load it into 'news_table'
news_table = html.find(id='news-table')
return news_table
# parse news into dataframe
def parse_news(news_table):
parsed_news = []
today_string = datetime.datetime.today().strftime('%Y-%m-%d')
for x in news_table.findAll('tr'):
try:
# read the text from each tr tag into text
# get text from a only
text = x.a.get_text()
# splite text in the td tag into a list
date_scrape = x.td.text.split()
# if the length of 'date_scrape' is 1, load 'time' as the only element
if len(date_scrape) == 1:
time = date_scrape[0]
# else load 'date' as the 1st element and 'time' as the second
else:
date = date_scrape[0]
time = date_scrape[1]
# Append ticker, date, time and headline as a list to the 'parsed_news' list
parsed_news.append([date, time, text])
except:
pass
# Set column names
columns = ['date', 'time', 'headline']
# Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
# Create a pandas datetime object from the strings in 'date' and 'time' column
parsed_news_df['date'] = parsed_news_df['date'].replace("Today", today_string)
parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
return parsed_news_df
def score_news(parsed_news_df):
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()
# Iterate through the headlines and get the polarity scores using vader
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()
# Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)
# Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')
parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})
return parsed_and_scored_news
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = parsed_and_scored_news.resample('H').mean()
# Plot a bar chart with plotly
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
def plot_daily_sentiment(parsed_and_scored_news, ticker):
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = parsed_and_scored_news.resample('D').mean()
# Plot a bar chart with plotly
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
return fig # instead of using fig.show(), we return fig and turn it into a graphjson object for displaying in web page later
# for extracting data from finviz
finviz_url = 'https://finviz.com/quote.ashx?t='
st.header("Bohmian's Stock News Sentiment Analyzer")
ticker = st.text_input('Enter Stock Ticker', '').upper()
df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0])
try:
st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker))
news_table = get_news(ticker)
parsed_news_df = parse_news(news_table)
print(parsed_news_df)
parsed_and_scored_news = score_news(parsed_news_df)
fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker)
fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker)
st.plotly_chart(fig_hourly)
st.plotly_chart(fig_daily)
description = """
The above chart averages the sentiment scores of {} stock hourly and daily.
The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score.
The news headlines are obtained from the FinViz website.
Sentiments are given by the nltk.sentiment.vader Python library.
""".format(ticker)
st.write(description)
st.table(parsed_and_scored_news)
except Exception as e:
print(str(e))
st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.")
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|