Spaces:
Runtime error
Runtime error
ryanrahmadifa
commited on
Commit
·
5a057ad
1
Parent(s):
ebb4fa0
Added more features.
Browse files- app.py +12 -5
- modules/__pycache__/data_preparation.cpython-39.pyc +0 -0
- modules/data_preparation.py +50 -14
app.py
CHANGED
@@ -9,15 +9,19 @@ st.title('Sentiment Analysis for Price Trend Prediction')
|
|
9 |
st.header(f'Data based on Platts News and Insights Data')
|
10 |
st.subheader(f'{datetime.datetime.now()}')
|
11 |
|
12 |
-
news_category = st.selectbox("Select Market Movers Category", ("Crude Oil", "Light Ends", "Middle Distillates", "Heavy Distillates"))
|
13 |
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
|
17 |
df_news = pd.concat([latest_news, top_news], ignore_index=True).drop_duplicates(['headline'])
|
18 |
|
19 |
df_mean = pd.DataFrame({
|
20 |
-
'headline' : ['MEAN OF
|
21 |
'negative_score' : [df_news['negative_score'].mean()],
|
22 |
'neutral_score' : [df_news['neutral_score'].mean()],
|
23 |
'positive_score' : [df_news['positive_score'].mean()],
|
@@ -30,4 +34,7 @@ df_news_final.index = np.arange(1, len(df_news_final) + 1)
|
|
30 |
|
31 |
df_news_final
|
32 |
|
33 |
-
|
|
|
|
|
|
|
|
9 |
st.header(f'Data based on Platts News and Insights Data')
|
10 |
st.subheader(f'{datetime.datetime.now()}')
|
11 |
|
12 |
+
# news_category = st.selectbox("Select Market Movers Category", ("Crude Oil", "Light Ends", "Middle Distillates", "Heavy Distillates"))
|
13 |
|
14 |
+
news_categories = st.multiselect("Select desired Market Movers categories",
|
15 |
+
["Macroeconomic & Geopolitics", "Crude Oil", "Light Ends", "Middle Distillates", "Heavy Distillates", "Other"],
|
16 |
+
["Macroeconomic & Geopolitics", "Crude Oil"])
|
17 |
+
|
18 |
+
latest_news = prepare_df(pd.read_csv('data/results_platts_09082024_clean.csv'), news_categories)
|
19 |
+
top_news = prepare_df(pd.read_csv('data/topresults_platts_09082024_clean.csv'), news_categories)
|
20 |
|
21 |
df_news = pd.concat([latest_news, top_news], ignore_index=True).drop_duplicates(['headline'])
|
22 |
|
23 |
df_mean = pd.DataFrame({
|
24 |
+
'headline' : ['MEAN OF SELECTED NEWS'],
|
25 |
'negative_score' : [df_news['negative_score'].mean()],
|
26 |
'neutral_score' : [df_news['neutral_score'].mean()],
|
27 |
'positive_score' : [df_news['positive_score'].mean()],
|
|
|
34 |
|
35 |
df_news_final
|
36 |
|
37 |
+
try:
|
38 |
+
st.plotly_chart(plot_3dgraph(df_news_final), use_container_width=True)
|
39 |
+
except:
|
40 |
+
st.subheader('Select news categories to plot 3D graph')
|
modules/__pycache__/data_preparation.cpython-39.pyc
CHANGED
Binary files a/modules/__pycache__/data_preparation.cpython-39.pyc and b/modules/__pycache__/data_preparation.cpython-39.pyc differ
|
|
modules/data_preparation.py
CHANGED
@@ -2,6 +2,8 @@ import re
|
|
2 |
import datetime
|
3 |
import plotly.express as px
|
4 |
import datetime
|
|
|
|
|
5 |
|
6 |
def clean_text(text):
|
7 |
new_text = text
|
@@ -9,29 +11,51 @@ def clean_text(text):
|
|
9 |
new_text = re.sub(rgx_match, '', new_text)
|
10 |
return new_text
|
11 |
|
12 |
-
def prepare_df(df,
|
13 |
try:
|
14 |
df.drop(columns=['Unnamed: 0'], inplace=True)
|
15 |
except:
|
16 |
pass
|
17 |
|
18 |
-
|
19 |
-
news_data = df[(df['topic_verification'] == 'Crude Oil') | (df['topic_verification'] == 'Macroeconomic & Geopolitics')]
|
20 |
-
if category == 'Light Ends':
|
21 |
-
news_data = df[(df['topic_verification'] == 'Light Ends')]
|
22 |
-
if category == 'Middle Distillates':
|
23 |
-
news_data = df[(df['topic_verification'] == 'Middle Distillates')]
|
24 |
-
if category == 'Heavy Distillates':
|
25 |
-
news_data = df[(df['topic_verification'] == 'Heavy Distillates')]
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
32 |
|
33 |
return news_data
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def plot_3dgraph(news_data):
|
36 |
fig = px.scatter_3d(news_data,
|
37 |
x='neutral_score',
|
@@ -46,10 +70,22 @@ def plot_3dgraph(news_data):
|
|
46 |
hover_data='topic_verification')
|
47 |
|
48 |
fig.update_layout(
|
49 |
-
height=
|
50 |
title=dict(text=f"Platts News Sentiments ({datetime.datetime.now().strftime('%d/%m/%y')})<br><sup>Hover cursor on a datapoint to show news title</sup>", font=dict(size=35), automargin=False)
|
51 |
)
|
52 |
|
53 |
fig.update_traces(textfont_size=8)
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
return fig
|
|
|
2 |
import datetime
|
3 |
import plotly.express as px
|
4 |
import datetime
|
5 |
+
import plotly.graph_objects as go
|
6 |
+
import numpy as np
|
7 |
|
8 |
def clean_text(text):
|
9 |
new_text = text
|
|
|
11 |
new_text = re.sub(rgx_match, '', new_text)
|
12 |
return new_text
|
13 |
|
14 |
+
def prepare_df(df, categories):
|
15 |
try:
|
16 |
df.drop(columns=['Unnamed: 0'], inplace=True)
|
17 |
except:
|
18 |
pass
|
19 |
|
20 |
+
df['topic_verification'][(df.headline.str.contains('crude', case=False)) | df.body.str.contains('crude', case=False)] = 'Crude Oil'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
try:
|
23 |
+
news_data = df[df['topic_verification'].isin(categories)]
|
24 |
+
|
25 |
+
actual_day = datetime.date.today() - datetime.timedelta(days=1)
|
26 |
+
pattern_del = actual_day.strftime('%b').upper()
|
27 |
|
28 |
+
filter = news_data['headline'].str.contains(pattern_del)
|
29 |
+
news_data = news_data[~filter]
|
30 |
+
|
31 |
+
except Exception as E:
|
32 |
+
print(E)
|
33 |
|
34 |
return news_data
|
35 |
|
36 |
+
# def prepare_df(df, category):
|
37 |
+
# try:
|
38 |
+
# df.drop(columns=['Unnamed: 0'], inplace=True)
|
39 |
+
# except:
|
40 |
+
# pass
|
41 |
+
|
42 |
+
# if category == 'Crude Oil':
|
43 |
+
# news_data = df[(df['topic_verification'] == 'Crude Oil') | (df['topic_verification'] == 'Macroeconomic & Geopolitics')]
|
44 |
+
# if category == 'Light Ends':
|
45 |
+
# news_data = df[(df['topic_verification'] == 'Light Ends')]
|
46 |
+
# if category == 'Middle Distillates':
|
47 |
+
# news_data = df[(df['topic_verification'] == 'Middle Distillates')]
|
48 |
+
# if category == 'Heavy Distillates':
|
49 |
+
# news_data = df[(df['topic_verification'] == 'Heavy Distillates')]
|
50 |
+
|
51 |
+
# actual_day = datetime.date.today() - datetime.timedelta(days=1)
|
52 |
+
# pattern_del = actual_day.strftime('%b').upper()
|
53 |
+
|
54 |
+
# filter = news_data['headline'].str.contains(pattern_del)
|
55 |
+
# news_data = news_data[~filter]
|
56 |
+
|
57 |
+
# return news_data
|
58 |
+
|
59 |
def plot_3dgraph(news_data):
|
60 |
fig = px.scatter_3d(news_data,
|
61 |
x='neutral_score',
|
|
|
70 |
hover_data='topic_verification')
|
71 |
|
72 |
fig.update_layout(
|
73 |
+
height=600,
|
74 |
title=dict(text=f"Platts News Sentiments ({datetime.datetime.now().strftime('%d/%m/%y')})<br><sup>Hover cursor on a datapoint to show news title</sup>", font=dict(size=35), automargin=False)
|
75 |
)
|
76 |
|
77 |
fig.update_traces(textfont_size=8)
|
78 |
|
79 |
+
trace=dict(type='scatter3d',
|
80 |
+
x=news_data.iloc[[-1]]['neutral_score'],
|
81 |
+
y=news_data.iloc[[-1]]['negative_score'],
|
82 |
+
z=news_data.iloc[[-1]]['positive_score'],
|
83 |
+
mode='markers',
|
84 |
+
name= 'MEAN OF SELECTED NEWS',
|
85 |
+
marker=dict(color=[f'rgb({0}, {250}, {200})' for _ in range(25)],
|
86 |
+
size=10)
|
87 |
+
)
|
88 |
+
|
89 |
+
fig.add_trace(trace)
|
90 |
+
|
91 |
return fig
|