Spaces:
Sleeping
Sleeping
import streamlit as st | |
from urllib.request import urlopen, Request | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
import plotly | |
import plotly.express as px | |
import json # for graph plotting in website | |
# NLTK VADER for sentiment analysis | |
from dateutil import parser | |
import nltk | |
nltk.downloader.download('vader_lexicon') | |
from nltk.sentiment.vader import SentimentIntensityAnalyzer | |
import subprocess | |
import os | |
import datetime | |
st.set_page_config(page_title = "Stock News Sentiment Analyzer", layout = "wide") | |
def get_news(ticker): | |
url = finviz_url + ticker | |
req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) | |
response = urlopen(req) | |
# Read the contents of the file into 'html' | |
html = BeautifulSoup(response) | |
# Find 'news-table' in the Soup and load it into 'news_table' | |
news_table = html.find(id='news-table') | |
return news_table | |
# parse news into dataframe | |
def parse_news(news_table): | |
parsed_news = [] | |
for x in news_table.findAll('tr'): | |
try: | |
# Get the headline text | |
text = x.a.get_text() | |
# Get the date and time from the first <td> tag | |
date_scrape = x.td.text.strip().split() | |
# Handle cases where only time is present | |
if len(date_scrape) == 1: | |
date = datetime.datetime.today().strftime('%Y-%m-%d') | |
time = date_scrape[0] | |
else: | |
date = date_scrape[0] | |
time = date_scrape[1] | |
# Parse the date and time using dateutil.parser | |
datetime_str = f"{date} {time}" | |
datetime_parsed = parser.parse(datetime_str) | |
# Append the parsed news to the list | |
parsed_news.append([datetime_parsed, text]) | |
except Exception as e: | |
print("Error parsing news:", e) | |
continue | |
# Convert the list to a DataFrame | |
columns = ['datetime', 'headline'] | |
parsed_news_df = pd.DataFrame(parsed_news, columns=columns) | |
return parsed_news_df | |
def score_news(parsed_news_df): | |
# Instantiate the sentiment intensity analyzer | |
vader = SentimentIntensityAnalyzer() | |
# Iterate through the headlines and get the polarity scores using vader | |
scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist() | |
# Convert the 'scores' list of dicts into a DataFrame | |
scores_df = pd.DataFrame(scores) | |
# Join the DataFrames of the news and the list of dicts | |
parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right') | |
parsed_and_scored_news = parsed_and_scored_news.set_index('datetime') | |
parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"}) | |
return parsed_and_scored_news | |
def plot_hourly_sentiment(parsed_and_scored_news, ticker): | |
# Ensure that only numeric columns are resampled | |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64']) | |
# Group by date and ticker columns from scored_news and calculate the mean | |
mean_scores = numeric_cols.resample('h').mean() | |
# Plot a bar chart with Plotly | |
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Hourly Sentiment Scores') | |
return fig # Return the figure to display in the Streamlit app | |
def plot_daily_sentiment(parsed_and_scored_news, ticker): | |
# Ensure that only numeric columns are resampled | |
numeric_cols = parsed_and_scored_news.select_dtypes(include=['float64', 'int64']) | |
# Group by date and ticker columns from scored_news and calculate the mean | |
mean_scores = numeric_cols.resample('D').mean() | |
# Plot a bar chart with Plotly | |
fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title=ticker + ' Daily Sentiment Scores') | |
return fig # Return the figure to display in the Streamlit app | |
# for extracting data from finviz | |
finviz_url = 'https://finviz.com/quote.ashx?t=' | |
st.header("Stock News Sentiment Analyzer") | |
ticker = st.text_input('Enter Stock Ticker', '').upper() | |
df = pd.DataFrame({'datetime': datetime.datetime.now(), 'ticker': ticker}, index = [0]) | |
try: | |
st.subheader("Hourly and Daily Sentiment of {} Stock".format(ticker)) | |
news_table = get_news(ticker) | |
parsed_news_df = parse_news(news_table) | |
print(parsed_news_df) | |
parsed_and_scored_news = score_news(parsed_news_df) | |
fig_hourly = plot_hourly_sentiment(parsed_and_scored_news, ticker) | |
fig_daily = plot_daily_sentiment(parsed_and_scored_news, ticker) | |
st.plotly_chart(fig_hourly) | |
st.plotly_chart(fig_daily) | |
description = """ | |
The above chart averages the sentiment scores of {} stock hourly and daily. | |
The table below gives each of the most recent headlines of the stock and the negative, neutral, positive and an aggregated sentiment score. | |
The news headlines are obtained from the FinViz website. | |
Sentiments are given by the nltk.sentiment.vader Python library. | |
""".format(ticker) | |
st.write(description) | |
st.table(parsed_and_scored_news) | |
except Exception as e: | |
print(str(e)) | |
st.write("Enter a correct stock ticker, e.g. 'AAPL' above and hit Enter.") | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |