import re
import plotly.express as px
import datetime
import plotly.graph_objects as go
import numpy as np
import pandas as pd
import datetime
def clean_text(text):
new_text = text
for rgx_match in ['[A-Z ]+:']:
new_text = re.sub(rgx_match, '', new_text)
return new_text
def prepare_df(df, categories, date_filter):
try:
df.drop(columns=['Unnamed: 0'], inplace=True)
except:
pass
#df['topic_verification'][(df.headline.str.contains('crude', case=False)) | df.body.str.contains('crude', case=False)] = 'Crude Oil'
# Check if categories is not empty before filtering
if categories:
news_data = df[df['topic_verification'].isin(categories)]
else:
news_data = df
try:
news_data = df[df['topic_verification'].isin(categories)]
actual_day = datetime.date.today() - datetime.timedelta(days=1)
pattern_del = actual_day.strftime('%b').upper()
filter = news_data['headline'].str.contains(pattern_del)
news_data = news_data[~filter]
# shift column 'C' to first position
first_column = news_data.pop('headline')
# insert column using insert(position,column_name,first_column) function
news_data.insert(0, 'headline', first_column)
news_data['updatedDate'] = pd.to_datetime(news_data['updatedDate'], format='%Y-%m-%d %H:%M:%S%z')
dates = []
dates.append(datetime.datetime.strftime(date_filter[0], '%Y-%m-%d %H:%M:%S%z'))
dates.append(datetime.datetime.strftime(date_filter[1], '%Y-%m-%d %H:%M:%S%z'))
news_data = news_data[(news_data['updatedDate'] >= dates[0]) & (news_data['updatedDate'] <= dates[1])]
except Exception as E:
print(E)
return news_data
def plot_3dgraph(news_data):
fig = px.scatter_3d(news_data,
x='neutral_score',
y='negative_score',
z='positive_score',
color='positive_score',
hover_name ='headline',
color_continuous_scale='RdBu',
size_max=40,
size='negative_score',
#text='headline',
hover_data='topic_verification')
fig.update_layout(
height=600,
title=dict(text=f"News Semantics towards Crude Oil Price
Hover cursor on a datapoint to show news title",
font=dict(size=35),
automargin=False)
)
fig.update_traces(textfont_size=8)
trace=dict(type='scatter3d',
x=news_data.iloc[[-1]]['neutral_score'],
y=news_data.iloc[[-1]]['negative_score'],
z=news_data.iloc[[-1]]['positive_score'],
mode='markers',
name= 'MEAN OF SELECTED NEWS',
marker=dict(color=[f'rgb({0}, {250}, {200})' for _ in range(25)],
size=10)
)
fig.add_trace(trace)
return fig