Spaces:
Sleeping
Sleeping
import re | |
import plotly.express as px | |
import datetime | |
import plotly.graph_objects as go | |
import numpy as np | |
import pandas as pd | |
import datetime | |
def clean_text(text): | |
new_text = text | |
for rgx_match in ['[A-Z ]+:']: | |
new_text = re.sub(rgx_match, '', new_text) | |
return new_text | |
def prepare_df(df, categories, date_filter): | |
try: | |
df.drop(columns=['Unnamed: 0'], inplace=True) | |
except: | |
pass | |
#df['topic_verification'][(df.headline.str.contains('crude', case=False)) | df.body.str.contains('crude', case=False)] = 'Crude Oil' | |
# Check if categories is not empty before filtering | |
if categories: | |
news_data = df[df['topic_verification'].isin(categories)] | |
else: | |
news_data = df | |
try: | |
news_data = df[df['topic_verification'].isin(categories)] | |
actual_day = datetime.date.today() - datetime.timedelta(days=1) | |
pattern_del = actual_day.strftime('%b').upper() | |
filter = news_data['headline'].str.contains(pattern_del) | |
news_data = news_data[~filter] | |
# shift column 'C' to first position | |
first_column = news_data.pop('headline') | |
# insert column using insert(position,column_name,first_column) function | |
news_data.insert(0, 'headline', first_column) | |
news_data['updatedDate'] = pd.to_datetime(news_data['updatedDate'], format='%Y-%m-%d %H:%M:%S%z') | |
dates = [] | |
dates.append(datetime.datetime.strftime(date_filter[0], '%Y-%m-%d %H:%M:%S%z')) | |
dates.append(datetime.datetime.strftime(date_filter[1], '%Y-%m-%d %H:%M:%S%z')) | |
news_data = news_data[(news_data['updatedDate'] >= dates[0]) & (news_data['updatedDate'] <= dates[1])] | |
except Exception as E: | |
print(E) | |
return news_data | |
def plot_3dgraph(news_data): | |
fig = px.scatter_3d(news_data, | |
x='neutral_score', | |
y='negative_score', | |
z='positive_score', | |
color='positive_score', | |
hover_name ='headline', | |
color_continuous_scale='RdBu', | |
size_max=40, | |
size='negative_score', | |
#text='headline', | |
hover_data='topic_verification') | |
fig.update_layout( | |
height=600, | |
title=dict(text=f"News Semantics towards Crude Oil Price <br><sup>Hover cursor on a datapoint to show news title</sup>", | |
font=dict(size=35), | |
automargin=False) | |
) | |
fig.update_traces(textfont_size=8) | |
trace=dict(type='scatter3d', | |
x=news_data.iloc[[-1]]['neutral_score'], | |
y=news_data.iloc[[-1]]['negative_score'], | |
z=news_data.iloc[[-1]]['positive_score'], | |
mode='markers', | |
name= 'MEAN OF SELECTED NEWS', | |
marker=dict(color=[f'rgb({0}, {250}, {200})' for _ in range(25)], | |
size=10) | |
) | |
fig.add_trace(trace) | |
return fig |