trend_prediction_app / modules /data_preparation.py
ryanrahmadifa
Added more features.
ed53591
raw
history blame
2.69 kB
import re
import plotly.express as px
import datetime
import plotly.graph_objects as go
import numpy as np
def clean_text(text):
new_text = text
for rgx_match in ['[A-Z ]+:']:
new_text = re.sub(rgx_match, '', new_text)
return new_text
def prepare_df(df, categories, date_filter):
try:
df.drop(columns=['Unnamed: 0'], inplace=True)
except:
pass
#df['topic_verification'][(df.headline.str.contains('crude', case=False)) | df.body.str.contains('crude', case=False)] = 'Crude Oil'
try:
news_data = df[df['topic_verification'].isin(categories)]
actual_day = datetime.date.today() - datetime.timedelta(days=1)
pattern_del = actual_day.strftime('%b').upper()
filter = news_data['headline'].str.contains(pattern_del)
news_data = news_data[~filter]
# shift column 'C' to first position
first_column = news_data.pop('headline')
# insert column using insert(position,column_name,first_column) function
news_data.insert(0, 'headline', first_column)
news_data['updatedDate'] = news_data['updatedDate'].apply(lambda x: datetime.datetime.strptime(x, '%y/%m/%d %H:%M:%S'))
news_data = news_data[(news_data['updatedDate'] >= date_filter[0]) & (news_data['updatedDate'] <= date_filter[1])]
except Exception as E:
print(E)
return news_data
def plot_3dgraph(news_data):
fig = px.scatter_3d(news_data,
x='neutral_score',
y='negative_score',
z='positive_score',
color='positive_score',
hover_name ='headline',
color_continuous_scale='RdBu',
size_max=40,
size='negative_score',
#text='headline',
hover_data='topic_verification')
fig.update_layout(
height=600,
title=dict(text=f"News Sentiments ({datetime.datetime.now().strftime('%d/%m/%y')})<br><sup>Hover cursor on a datapoint to show news title</sup>",
font=dict(size=35),
automargin=False)
)
fig.update_traces(textfont_size=8)
trace=dict(type='scatter3d',
x=news_data.iloc[[-1]]['neutral_score'],
y=news_data.iloc[[-1]]['negative_score'],
z=news_data.iloc[[-1]]['positive_score'],
mode='markers',
name= 'MEAN OF SELECTED NEWS',
marker=dict(color=[f'rgb({0}, {250}, {200})' for _ in range(25)],
size=10)
)
fig.add_trace(trace)
return fig