trend_prediction_app / modules /data_preparation.py
ryanrahmadifa
App test
9321404
raw
history blame
1.91 kB
import re
import datetime
import plotly.express as px
import datetime
def clean_text(text):
new_text = text
for rgx_match in ['[A-Z ]+:']:
new_text = re.sub(rgx_match, '', new_text)
return new_text
def prepare_df(df, category):
try:
df.drop(columns=['Unnamed: 0'], inplace=True)
except:
pass
if category == 'Crude Oil':
news_data = df[(df['topic_verification'] == 'Crude Oil') | (df['topic_verification'] == 'Macroeconomic & Geopolitics')]
if category == 'Light Ends':
news_data = df[(df['topic_verification'] == 'Light Ends')]
if category == 'Middle Distillates':
news_data = df[(df['topic_verification'] == 'Middle Distillates')]
if category == 'Heavy Distillates':
news_data = df[(df['topic_verification'] == 'Heavy Distillates')]
actual_day = datetime.date.today() - datetime.timedelta(days=1)
pattern_del = actual_day.strftime('%b').upper()
filter = news_data['headline'].str.contains(pattern_del)
news_data = news_data[~filter]
return news_data
def plot_3dgraph(news_data):
fig = px.scatter_3d(news_data,
x='neutral_score',
y='negative_score',
z='positive_score',
color='positive_score',
hover_name ='headline',
color_continuous_scale='RdBu',
size_max=40,
size='negative_score',
text='headline',
hover_data='topic_verification')
fig.update_layout(
height=800,
title=dict(text=f"Platts News Sentiments ({datetime.datetime.now().strftime('%d/%m/%y')})<br><sup>Hover cursor on a datapoint to show news title</sup>", font=dict(size=35), automargin=False)
)
fig.update_traces(textfont_size=8)
return fig