ryanrahmadifa commited on
Commit
41c16db
·
1 Parent(s): 24f5b17

Added more features.

Browse files
app.py CHANGED
@@ -2,23 +2,30 @@ import streamlit as st
2
  import pandas as pd
3
  from modules.data_preparation import prepare_df, plot_3dgraph
4
  import numpy as np
5
- import datetime
6
 
7
  st.title('Sentiment Analysis for Price Trend Prediction')
8
 
9
  st.header(f'Data based on Platts News and Insights Data')
10
- st.subheader(f'{datetime.datetime.now()}')
11
-
12
- # news_category = st.selectbox("Select Market Movers Category", ("Crude Oil", "Light Ends", "Middle Distillates", "Heavy Distillates"))
13
 
14
  news_categories = st.multiselect("Select desired Market Movers categories",
15
  ["Macroeconomic & Geopolitics", "Crude Oil", "Light Ends", "Middle Distillates", "Heavy Distillates", "Other"],
16
  ["Macroeconomic & Geopolitics", "Crude Oil"])
17
 
18
- latest_news = prepare_df(pd.read_csv('data/results_platts_09082024_clean.csv'), news_categories)
19
- top_news = prepare_df(pd.read_csv('data/topresults_platts_09082024_clean.csv'), news_categories)
 
 
 
 
 
 
 
 
20
 
21
- df_news = pd.concat([latest_news, top_news], ignore_index=True).drop_duplicates(['headline'])
 
22
 
23
  df_mean = pd.DataFrame({
24
  'headline' : ['MEAN OF SELECTED NEWS'],
@@ -32,7 +39,7 @@ df_news_final = pd.concat([df_news, df_mean])
32
 
33
  df_news_final.index = np.arange(1, len(df_news_final) + 1)
34
 
35
- df_news_final
36
 
37
  st.markdown('---')
38
 
 
2
  import pandas as pd
3
  from modules.data_preparation import prepare_df, plot_3dgraph
4
  import numpy as np
5
+ from datetime import datetime
6
 
7
  st.title('Sentiment Analysis for Price Trend Prediction')
8
 
9
  st.header(f'Data based on Platts News and Insights Data')
10
+ st.subheader(f'{datetime.now()}')
 
 
11
 
12
  news_categories = st.multiselect("Select desired Market Movers categories",
13
  ["Macroeconomic & Geopolitics", "Crude Oil", "Light Ends", "Middle Distillates", "Heavy Distillates", "Other"],
14
  ["Macroeconomic & Geopolitics", "Crude Oil"])
15
 
16
+ # date_filter = st.slider(
17
+ # "Date Filter",
18
+ # value=(datetime(2024, 8, 4), datetime(2024,8,9)),
19
+ # format="MM/DD/YY",
20
+ # )
21
+
22
+ #latest_news = prepare_df(pd.read_csv('data/results_platts_09082024_clean.csv'), news_categories)
23
+ #top_news = prepare_df(pd.read_csv('data/topresults_platts_09082024_clean.csv'), news_categories)
24
+
25
+ #df_news = pd.concat([latest_news, top_news], ignore_index=True).drop_duplicates(['headline'])
26
 
27
+ latest_news = prepare_df(pd.read_excel('evaluation.xlsx'), news_categories, date_filter)
28
+ df_news = pd.concat([latest_news], ignore_index=True).drop_duplicates(['headline'])
29
 
30
  df_mean = pd.DataFrame({
31
  'headline' : ['MEAN OF SELECTED NEWS'],
 
39
 
40
  df_news_final.index = np.arange(1, len(df_news_final) + 1)
41
 
42
+ #df_news_final
43
 
44
  st.markdown('---')
45
 
convert_first.csv ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ,headline,topic_verification
2
+ 0,SPAIN DATA: H1 crude imports rise 11% to 1.4 million b/d,Crude Oil
3
+ 1,REFINERY NEWS: Host of Chinese units back from works; Jinling maintenance in Nov-Dec,Macroeconomic & Geopolitics
4
+ 2,REFINERY NEWS ROUNDUP: Mixed runs in Asia-Pacific,Macroeconomic & Geopolitics
5
+ 3,Physical 1%S fuel oil Med-North spread hits record high on competitive bidding in Platts MOC,Middle Distillates
6
+ 4,"Indian ports see Jan-July bunker, STS calls up 64% on year, monsoon hits July demand",Macroeconomic & Geopolitics
7
+ 5,LNG bunker prices in Europe hit 8-month high amid rising demand,Light Ends
8
+ 6,REFINERY NEWS: Wellbred Trading acquires La Nivernaise de Raffinage in France,Heavy Distillates
9
+ 7,Wellbred Trading buys French diesel refinery that runs on used cooking oil,Macroeconomic & Geopolitics
10
+ 8,EU's climate monitor says 2024 'increasingly likely' to be warmest year on record,Macroeconomic & Geopolitics
11
+ 9,"European LPG discount to naphtha narrows, shifting petchem feedstock appetite",Light Ends
12
+ 10,REFINERY NEWS: Thai Oil’s Q2 utilization drops on planned CDU shutdown,Crude Oil
13
+ 11,South Korea’s top oil refiner SK Innovation joins carbon storage project in Australia,Middle Distillates
14
+ 12,CRUDE MOC: Middle East sour crude cash differentials hit month-to-date highs,Crude Oil
15
+ 13,CNOOC approves 100 Bcm of proven reserves at South China Sea gas field,Crude Oil
16
+ 14,"Singapore to work with Shell’s refinery, petrochemicals asset buyers to decarbonize: minister",Light Ends
17
+ 15,BLM federal Montana-Dakotas oil and gas lease sale nets nearly $24 mil: Energynet.com,Middle Distillates
18
+ 16,REFINERY NEWS: Oman's Sohar undergoes unplanned shutdown: sources,Crude Oil
19
+ 17,"OIL FUTURES: Crude prices higher as US stockpiles extend decline, demand concerns cap gains",Crude Oil
20
+ 18,Qatar announces acceptance of Sep LPG cargoes with no cuts or delays heard,Light Ends
21
+ 19,"South Korea aims for full GCC FTA execution by year-end, refiners hopeful for cheaper sour crude",Crude Oil
22
+ 20,"Indonesia sets Minas crude price at $84.95/b for July, rising $3.35/b from June",Crude Oil
23
+ 21,Cathay Pacific H1 2024 passenger traffic rises 36% on year; Hong Kong’s jet fuel demand bolstered,Middle Distillates
24
+ 22,US DATA: Total ULSD stocks near a six-month high as demand continues to fall,Middle Distillates
25
+ 23,US DATA: Product supplied of propane and propylene reach three-month high,Light Ends
26
+ 24,Internatonal Seaways focused on replacing aging fleet during second quarter: CEO,Crude Oil
27
+ 25,"Devon Energy's oil output hits all-time record high from Delaware, Eagle Ford operations",Crude Oil
28
+ 26,"Brazil's Prio still waiting on IBAMA license approvals to boost oil, gas output",Crude Oil
29
+ 27,REFINERY NEWS: Delek US sees Q3 refinery utilization dip from record Q2 highs,Light Ends
30
+ 28,"OIL FUTURES: Crude rallies as traders eye tighter US supply, global financial market stabilization",Crude Oil
31
+ 29,"Prompt DFL, CFD contracts rally",Crude Oil
32
+ 30,"REFINERY NEWS: Petroperú sees 2Q refined fuel sales drop 4.4% on year to 93,700 b/d",Middle Distillates
33
+ 31,W&T Offshore nears close of new US Gulf of Mexico drilling joint venture,Crude Oil
34
+ 32,"Imrproved efficiencies, continued M&A activity to drive growth for Permian Resources",Heavy Distillates
35
+ 33,Mexico's Pemex to explore deposit adjacent to major onshore gas field Quesqui,Middle Distillates
36
+ 34,REFINERY NEWS: Par Pacific reports softer south Rockies results as Midwest barrels spill into region,Middle Distillates
37
+ 35,"Suncor sees improved H2 oil and gas output, completes major Q2 turnarounds",Middle Distillates
38
+ 36,"Brazil's Petrobras, Espirito Santo state to study potential CCUS, hydrogen hubs",Middle Distillates
39
+ 37,"Argentina raises biodiesel, ethanol prices for blending by 1.5% in August",Middle Distillates
40
+ 38,Bolivia offers tax breaks to import equipment for biodiesel plants following fuel shortages,Light Ends
41
+ 39,"US DATA: West Coast fuel oil stocks hit a six-week low, EIA says",Middle Distillates
42
+ 40,Iraq’s SOMO cuts official selling prices for September-loading crude oil for Europe,Crude Oil
43
+ 41,Nigeria's Dangote refinery plans to divest 12.75% stake: ratings agency,Middle Distillates
44
+ 42,REFINERY NEWS: Kazakhstan's Atyrau processes 2.9 mil mt crude in H1,Middle Distillates
45
+ 43,REFINERY NEWS: Thailand's IRPC reports Q2 utilization of 94%,Light Ends
46
+ 44,DNO reports higher Q2 crude production in Iraq's Kurdish region,Crude Oil
47
+ 45,"ADNOC L&S expects ‘strong rates’ in tankers, dry-bulk, containers in 2024",Crude Oil
48
+ 46,WAF crude tanker rates hit 10-month lows amid sluggish inquiry levels,Crude Oil
49
+ 47,Senegal's inaugural crude stream Sangomar to load 3.8 mil barrels in September,Crude Oil
50
+ 48,China's July vegetable oil imports rise 3% on month as buyers replenish domestic stocks,Macroeconomic & Geopolitics
51
+ 49,CRUDE MOC: Middle East sour crude cash differentials rebound,Crude Oil
52
+ 50,OIL FUTURES: Crude oil recovers as financial markets improve,Crude Oil
53
+ 51,"Tullow sees rise in crude output, profits on-year in H1 2024",Crude Oil
54
+ 52,Russia's Taman port June-July oil products throughput up 26% on year,Heavy Distillates
55
+ 53,JAPAN DATA: Oil product exports rise 4.5% on week to 2.42 mil barrels,Crude Oil
56
+ 54,REFINERY NEWS: Petro Rabigh to be upgraded after Aramco takes control,Crude Oil
57
+ 55,Canada's ShaMaran closes acquisition of Atrush oil field,Crude Oil
58
+ 56,CHINA DATA: July natural gas imports rise 5% on year to 10.9 mil mt,Light Ends
59
+ 57,"OIL FUTURES: Crude stabilizes on technical bounce, supply uncertainty",Crude Oil
60
+ 58,JAPAN DATA: Oil product stocks rise 0.8% on week to 55.32 mil barrels,Crude Oil
61
+ 59,Japan cuts Aug 8-14 fuel subsidy by 21% as crude prices drop,Middle Distillates
62
+ 60,JAPAN DATA: Refinery runs rise to 67% over July 28-Aug 3 on higher crude throughput,Light Ends
63
+ 61,Asian reforming spread hits over two-year low as gasoline prices lag naphtha,Light Ends
64
+ 62,Asia medium sulfur gasoil differential weakens as Indonesia demand tapers,Middle Distillates
65
+ 63,"QatarEnergy raises Sep Land, Marine crude OSPs by 45-75 cents/b from Aug",Heavy Distillates
66
+ 64,ADNOC sets Murban Sep OSP $1.28/b higher on month at $83.80/b,Heavy Distillates
67
+ 65,"Diamondback Energy keeps pushing well drilling, completion efficiencies in Q2",Middle Distillates
68
+ 66,"Genel Energy’s oil production from Tawke field increases to 19,510 b/d in 1H 2024",Middle Distillates
69
+ 67,Longer laterals and higher well performance drive Rocky Mountain production: Oneok,Light Ends
70
+ 68,US DOE seeks to buy 3.5 million barrels of crude for delivery to SPR in January 2025,Crude Oil
71
+ 69,"FPSO Maria Quiteria arrives offshore Brazil, to reduce emissions: Petrobras",Middle Distillates
72
+ 70,OIL FUTURES: Crude edges higher as market stabilizes amid Middle Eastern supply concerns,Crude Oil
73
+ 71,"US EIA lowers 2024 oil price outlook by $2/b, but still predicts increases",Crude Oil
74
+ 72,"Shell, BP to fund South Africa's Sapref refinery operations in government takeover",Light Ends
75
+ 73,"Indian Oil cancels tender to build a 10,000 mt/yr renewable hydrogen plant",Light Ends
76
+ 74,"Brazil's Prio July oil equivalent output falls 31.7% on maintenance, shuttered wells",Crude Oil
77
+ 75,Eni follows Ivory Coast discoveries with four new licenses,Crude Oil
78
+ 76,EU DATA: MY 2024-25 soybean meal imports rise 8% on year as of Aug 4,Macroeconomic & Geopolitics
79
+ 77,"Greek PPC to buy a 600 MW Romanian wind farm, portfolio from Macquarie-owned developer",Macroeconomic & Geopolitics
80
+ 78,Vitol to take Italian refiner Saras private after acquiring 51% stake,Macroeconomic & Geopolitics
81
+ 79,Mediterranean sweet crude market shows muted response to Sharara shutdown,Macroeconomic & Geopolitics
82
+ 80,REFINERY NEWS: Vitol acquires 51% in Italian refiner Saras,Macroeconomic & Geopolitics
83
+ 81,Rotterdam LNG bunkers spread with VLSFO narrows to 2024 low,Light Ends
84
+ 82,Argentina’s YPF finds buyers for 15 maturing conventional blocks as it focuses on Vaca Muerta,Heavy Distillates
85
+ 83,REFINERY NEWS ROUNDUP: Nigerian plants in focus,Macroeconomic & Geopolitics
86
+ 84,"REFINERY NEWS: Valero shuts CDU, FCCU at McKee refinery for planned work",Macroeconomic & Geopolitics
87
+ 85,Kazakhstan extends ban on oil products exports by truck for six months,Macroeconomic & Geopolitics
88
+ 86,Physical Hi-Lo spread hits 3 month high amid prompt LSFO demand,Heavy Distillates
89
+ 87,CRUDE MOC: Middle East sour crude cash differentials slip to fresh lows,Crude Oil
90
+ 88,"Nigeria launches new Utapate crude grade, first cargo heads to Spain",Crude Oil
91
+ 89,REFINERY NEWS: Turkish Tupras Q2 output rises 15% on the quarter and year,Middle Distillates
92
+ 90,"CHINA DATA: Independent refineries’ Iranian crude imports fall in July, ESPO inflows rebound",Crude Oil
93
+ 91,Gunvor acquires TotalEnergies' 50% stake in Pakistan retail fuel business,Middle Distillates
94
+ 92,INTERVIEW: Coal to remain a dominant power source in India: Menar MD,Macroeconomic & Geopolitics
95
+ 93,OIL FUTURES: Crude price holds steady as demand expectations cap gains,Crude Oil
96
+ 94,Fujairah’s HSFO August HSFO ex-wharf premiums slip; stocks adequate,Heavy Distillates
97
+ 95,JAPAN DATA: US crude imports more than double in March as Middle East dependency eases,Crude Oil
98
+ 96,Dubai crude futures traded volume on TOCOM rebounds in July from record low,Crude Oil
99
+ 97,Japan's spot electricity price retreats 8% as temperatures ease,Macroeconomic & Geopolitics
100
+ 98,"HONG KONG DATA: June oil product imports surge 32% on month to 226,475 barrels",Crude Oil
101
+ 99,NextDecade signs contract with Bechtel to build Rio Grande LNG expansion,Light Ends
102
+ 100,"Kosmos sees 2024 total output of 90,000 boe/d, despite Q2 operations thorns: CEO",Crude Oil
103
+ 101,"Dated Brent reaches two-month low Aug. 5 as physical, derivatives prices slide on day",Middle Distillates
104
+ 102,"Alaska North Slope crude output up in July, but long-term decline continues",Crude Oil
105
+ 103,Balance-month DFL contract slips to seven-week low in bearish sign for physical crude fundamentals,Crude Oil
106
+ 104,Iraqi Kurdistan officials order crackdown on illegal refineries over pollution,Macroeconomic & Geopolitics
107
+ 105,Rhine barge cargo navigation limits set to kick in amid dryer weather,Middle Distillates
108
+ 106,Bolivia returns diesel supplies to normal following shortages,Middle Distillates
109
+ 107,OCI optimistic about methanol demand driven by decarbonization efforts,Light Ends
110
+ 108,Mitsubishi to supply turbine for 30% hydrogen co-firing in Malaysia power plant,Middle Distillates
111
+ 109,ATLANTIC LNG: Key market indicators for Aug. 5-9,Light Ends
112
+ 110,"Eurobob swap, gas-nap spread falls below 6-month low amid crude selloff",Light Ends
113
+ 111,EMEA PETROCHEMICALS: Key market indicators for Aug 5-9,Light Ends
114
+ 112,EMEA LIGHT ENDS: Key market indicators for Aug 5 – 9,Light Ends
115
+ 113,EUROPE AND AFRICA RESIDUAL AND MARINE FUEL: Key market indicators Aug 5-9,Heavy Distillates
116
+ 114,TURKEY DATA: June crude flows via BTC pipeline up 8.1% on month,Crude Oil
117
+ 115,EMEA AGRICULTURE: Key market indicators for Aug 5–9,Macroeconomic & Geopolitics
118
+ 116,OIL FUTURES: Crude oil faces downward pressure amid wider weakness in financial markets,Crude Oil
119
+ 117,Woodside to acquire OCI’s low carbon ammonia project with CO2 capture in US,Middle Distillates
120
+ 118,Maire secures feasibility study for sustainable aviation fuel project in Indonesia,Middle Distillates
121
+ 119,CRUDE MOC: Middle East sour crude cash differentials plunge on risk-off sentiment,Middle Distillates
122
+ 120,"Zhoushan LSFO storage availability rises for 3rd month in Aug, hits record high",Middle Distillates
123
+ 121,Oil storage in Russia's Rostov region hit by drone strike,Macroeconomic & Geopolitics
124
+ 122,WAF TRACKING: Nigerian crude exports to Netherlands top 5-year high in July,Crude Oil
125
+ 123,"Vietnam’s Hai Linh receives license to import, export LNG",Light Ends
126
+ 124,Japan's Idemitsu could restart Tokuyama steam cracker on Aug 11,Light Ends
127
+ 125,Indonesia's biodiesel output up 12% in H1 on increased domestic mandates: APROBI,Middle Distillates
128
+ 126,CHINA DATA: Independent refiners' July feedstocks imports hit 3-month low at 3.65 mil b/d,Light Ends
129
+ 127,"Singapore’s Aug ex-wharf term LSFO premiums rise, demand moderate",Heavy Distillates
130
+ 128,"OIL FUTURES: Crude slumps as market volatility rages on recession, Middle East risks",Crude Oil
131
+ 129,Pakistan's HSFO exports nearly triple as focus shifts to cheaper power sources,Heavy Distillates
132
+ 130,"TAIWAN DATA: June oil products demand falls 3% on month to 758,139 b/d",Light Ends
133
+ 131,REFINERY NEWS: Japan's Cosmo restarts No. 1 Chiba CDU after glitches,Crude Oil
134
+ 132,ASIA PETROCHEMICALS: Key market indicators for Aug 5-9,Light Ends
135
+ 133,DME Oman crude futures traded volume rises for seventh straight month in July,Crude Oil
136
+ 134,ICE front-month Singapore gasoline swaps open interest rises 14.6% on month in July,Light Ends
137
+ 135,ASIA OCTANE: Key market indicators for Aug 5-9,Light Ends
138
+ 136,ICE Dubai crude futures July total traded volume rises 11.4% on month,Crude Oil
139
+ 137,"Lower-than-expected Aramco Sep OSPs a nod to weak Asian market, OPEC+ cut unwind",Crude Oil
140
+ 138,ASIA CRUDE OIL: Key market indicators for Aug 5-8,Crude Oil
141
+ 139,ASIA LIGHT ENDS: Key market indicators for Aug 5-8,Light Ends
142
+ 140,China fuel oil quotas decline seen supporting Q3 LSFO premiums in Zhoushan,Middle Distillates
143
+ 141,South Korea's short-term diesel demand under pressure on e-commerce firms' bankruptcy,Middle Distillates
144
+ 142,ICE front-month Singapore 10 ppm gasoil swap open interest rebounds 2% on month in July,Middle Distillates
145
+ 143,Saudi Aramco maintains or raises Asia-bound Sep crude OSPs by 10-20 cents/b,Crude Oil
146
+ 144,ASIA MIDDLE DISTILLATES: Key market indicators for Aug 5-8,Middle Distillates
147
+ 145,ICE front-month Singapore HSFO open interest rises 19.6% on month in July,Heavy Distillates
148
+ 146,REFINERY NEWS: Fort Energy at Fujairah ‘remains operational’,Macroeconomic & Geopolitics
149
+ 147,Container ship Groton attacked near Yemen amid growing Middle East security risks,Macroeconomic & Geopolitics
150
+ 148,Oil depot in Russia’s Belgorod region hit by drone strike,Macroeconomic & Geopolitics
data/all_platts_1week_clean.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/dated_brent_allbate.csv ADDED
The diff for this file is too large to render. See raw diff
 
evaluation.xlsx ADDED
Binary file (214 kB). View file
 
modules/__pycache__/data_preparation.cpython-39.pyc CHANGED
Binary files a/modules/__pycache__/data_preparation.cpython-39.pyc and b/modules/__pycache__/data_preparation.cpython-39.pyc differ
 
modules/data_preparation.py CHANGED
@@ -1,5 +1,4 @@
1
  import re
2
- import datetime
3
  import plotly.express as px
4
  import datetime
5
  import plotly.graph_objects as go
@@ -11,13 +10,13 @@ def clean_text(text):
11
  new_text = re.sub(rgx_match, '', new_text)
12
  return new_text
13
 
14
- def prepare_df(df, categories):
15
  try:
16
  df.drop(columns=['Unnamed: 0'], inplace=True)
17
  except:
18
  pass
19
 
20
- df['topic_verification'][(df.headline.str.contains('crude', case=False)) | df.body.str.contains('crude', case=False)] = 'Crude Oil'
21
 
22
  try:
23
  news_data = df[df['topic_verification'].isin(categories)]
@@ -33,35 +32,16 @@ def prepare_df(df, categories):
33
 
34
  # insert column using insert(position,column_name,first_column) function
35
  news_data.insert(0, 'headline', first_column)
36
-
 
 
 
 
37
  except Exception as E:
38
  print(E)
39
 
40
  return news_data
41
 
42
- # def prepare_df(df, category):
43
- # try:
44
- # df.drop(columns=['Unnamed: 0'], inplace=True)
45
- # except:
46
- # pass
47
-
48
- # if category == 'Crude Oil':
49
- # news_data = df[(df['topic_verification'] == 'Crude Oil') | (df['topic_verification'] == 'Macroeconomic & Geopolitics')]
50
- # if category == 'Light Ends':
51
- # news_data = df[(df['topic_verification'] == 'Light Ends')]
52
- # if category == 'Middle Distillates':
53
- # news_data = df[(df['topic_verification'] == 'Middle Distillates')]
54
- # if category == 'Heavy Distillates':
55
- # news_data = df[(df['topic_verification'] == 'Heavy Distillates')]
56
-
57
- # actual_day = datetime.date.today() - datetime.timedelta(days=1)
58
- # pattern_del = actual_day.strftime('%b').upper()
59
-
60
- # filter = news_data['headline'].str.contains(pattern_del)
61
- # news_data = news_data[~filter]
62
-
63
- # return news_data
64
-
65
  def plot_3dgraph(news_data):
66
  fig = px.scatter_3d(news_data,
67
  x='neutral_score',
@@ -77,7 +57,9 @@ def plot_3dgraph(news_data):
77
 
78
  fig.update_layout(
79
  height=600,
80
- title=dict(text=f"Platts News Sentiments ({datetime.datetime.now().strftime('%d/%m/%y')})<br><sup>Hover cursor on a datapoint to show news title</sup>", font=dict(size=35), automargin=False)
 
 
81
  )
82
 
83
  fig.update_traces(textfont_size=8)
 
1
  import re
 
2
  import plotly.express as px
3
  import datetime
4
  import plotly.graph_objects as go
 
10
  new_text = re.sub(rgx_match, '', new_text)
11
  return new_text
12
 
13
+ def prepare_df(df, categories, date_filter):
14
  try:
15
  df.drop(columns=['Unnamed: 0'], inplace=True)
16
  except:
17
  pass
18
 
19
+ #df['topic_verification'][(df.headline.str.contains('crude', case=False)) | df.body.str.contains('crude', case=False)] = 'Crude Oil'
20
 
21
  try:
22
  news_data = df[df['topic_verification'].isin(categories)]
 
32
 
33
  # insert column using insert(position,column_name,first_column) function
34
  news_data.insert(0, 'headline', first_column)
35
+
36
+ news_data['updatedDate'] = news_data['updatedDate'].apply(lambda x: datetime.datetime.strptime(x, '%y/%m/%d %H:%M:%S'))
37
+
38
+ news_data = news_data[(news_data['updatedDate'] >= date_filter[0]) & (news_data['updatedDate'] <= date_filter[1])]
39
+
40
  except Exception as E:
41
  print(E)
42
 
43
  return news_data
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def plot_3dgraph(news_data):
46
  fig = px.scatter_3d(news_data,
47
  x='neutral_score',
 
57
 
58
  fig.update_layout(
59
  height=600,
60
+ title=dict(text=f"Platts News Sentiments ({datetime.datetime.now().strftime('%d/%m/%y')})<br><sup>Hover cursor on a datapoint to show news title</sup>",
61
+ font=dict(size=35),
62
+ automargin=False)
63
  )
64
 
65
  fig.update_traces(textfont_size=8)
rework.xlsx ADDED
Binary file (209 kB). View file
 
test.ipynb ADDED
@@ -0,0 +1,903 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "text/html": [
11
+ "<div>\n",
12
+ "<style scoped>\n",
13
+ " .dataframe tbody tr th:only-of-type {\n",
14
+ " vertical-align: middle;\n",
15
+ " }\n",
16
+ "\n",
17
+ " .dataframe tbody tr th {\n",
18
+ " vertical-align: top;\n",
19
+ " }\n",
20
+ "\n",
21
+ " .dataframe thead th {\n",
22
+ " text-align: right;\n",
23
+ " }\n",
24
+ "</style>\n",
25
+ "<table border=\"1\" class=\"dataframe\">\n",
26
+ " <thead>\n",
27
+ " <tr style=\"text-align: right;\">\n",
28
+ " <th></th>\n",
29
+ " <th>Unnamed: 0</th>\n",
30
+ " <th>body</th>\n",
31
+ " <th>headline</th>\n",
32
+ " <th>updatedDate</th>\n",
33
+ " <th>topic_prediction</th>\n",
34
+ " <th>topic_verification</th>\n",
35
+ " <th>negative_score</th>\n",
36
+ " <th>neutral_score</th>\n",
37
+ " <th>positive_score</th>\n",
38
+ " <th>trend_prediction</th>\n",
39
+ " <th>trend_verification</th>\n",
40
+ " </tr>\n",
41
+ " </thead>\n",
42
+ " <tbody>\n",
43
+ " <tr>\n",
44
+ " <th>0</th>\n",
45
+ " <td>0</td>\n",
46
+ " <td>Spanish crude import volumes increased 11% ye...</td>\n",
47
+ " <td>SPAIN DATA: H1 crude imports rise 11% to 1.4 m...</td>\n",
48
+ " <td>2024-08-08 12:11:55+00:00</td>\n",
49
+ " <td>Crude Oil</td>\n",
50
+ " <td>Crude Oil</td>\n",
51
+ " <td>0.991473</td>\n",
52
+ " <td>0.005524</td>\n",
53
+ " <td>0.519264</td>\n",
54
+ " <td>Bearish</td>\n",
55
+ " <td>Bearish</td>\n",
56
+ " </tr>\n",
57
+ " <tr>\n",
58
+ " <th>1</th>\n",
59
+ " <td>1</td>\n",
60
+ " <td>A number of refineries in China have resumed ...</td>\n",
61
+ " <td>REFINERY NEWS: Host of Chinese units back from...</td>\n",
62
+ " <td>2024-08-08 11:51:12+00:00</td>\n",
63
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
64
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
65
+ " <td>0.417054</td>\n",
66
+ " <td>0.845595</td>\n",
67
+ " <td>0.180685</td>\n",
68
+ " <td>Neutral</td>\n",
69
+ " <td>Neutral</td>\n",
70
+ " </tr>\n",
71
+ " <tr>\n",
72
+ " <th>2</th>\n",
73
+ " <td>2</td>\n",
74
+ " <td>Some refineries in the Asia-Pacific region in...</td>\n",
75
+ " <td>REFINERY NEWS ROUNDUP: Mixed runs in Asia-Pacific</td>\n",
76
+ " <td>2024-08-08 11:50:48+00:00</td>\n",
77
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
78
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
79
+ " <td>0.268708</td>\n",
80
+ " <td>0.044504</td>\n",
81
+ " <td>0.992063</td>\n",
82
+ " <td>Bullish</td>\n",
83
+ " <td>Bullish</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>3</th>\n",
87
+ " <td>3</td>\n",
88
+ " <td>The physical low sulfur (1%S) fuel oil Med-No...</td>\n",
89
+ " <td>Physical 1%S fuel oil Med-North spread hits re...</td>\n",
90
+ " <td>2024-08-08 11:28:20+00:00</td>\n",
91
+ " <td>Middle Distillates</td>\n",
92
+ " <td>Middle Distillates</td>\n",
93
+ " <td>0.951985</td>\n",
94
+ " <td>0.009613</td>\n",
95
+ " <td>0.822905</td>\n",
96
+ " <td>Bearish</td>\n",
97
+ " <td>Bearish</td>\n",
98
+ " </tr>\n",
99
+ " <tr>\n",
100
+ " <th>4</th>\n",
101
+ " <td>4</td>\n",
102
+ " <td>Bunkering activity in India has experienced s...</td>\n",
103
+ " <td>Indian ports see Jan-July bunker, STS calls up...</td>\n",
104
+ " <td>2024-08-08 11:27:15+00:00</td>\n",
105
+ " <td>Heavy Distillates</td>\n",
106
+ " <td>Heavy Distillates</td>\n",
107
+ " <td>0.098844</td>\n",
108
+ " <td>0.059348</td>\n",
109
+ " <td>0.997325</td>\n",
110
+ " <td>Bullish</td>\n",
111
+ " <td>Bullish</td>\n",
112
+ " </tr>\n",
113
+ " <tr>\n",
114
+ " <th>...</th>\n",
115
+ " <td>...</td>\n",
116
+ " <td>...</td>\n",
117
+ " <td>...</td>\n",
118
+ " <td>...</td>\n",
119
+ " <td>...</td>\n",
120
+ " <td>...</td>\n",
121
+ " <td>...</td>\n",
122
+ " <td>...</td>\n",
123
+ " <td>...</td>\n",
124
+ " <td>...</td>\n",
125
+ " <td>...</td>\n",
126
+ " </tr>\n",
127
+ " <tr>\n",
128
+ " <th>153</th>\n",
129
+ " <td>367</td>\n",
130
+ " <td>The Asian middle distillates complex could be...</td>\n",
131
+ " <td>ASIA MIDDLE DISTILLATES: Key market indicators...</td>\n",
132
+ " <td>2024-08-05 01:50:17+00:00</td>\n",
133
+ " <td>Middle Distillates</td>\n",
134
+ " <td>Middle Distillates</td>\n",
135
+ " <td>0.034932</td>\n",
136
+ " <td>0.310855</td>\n",
137
+ " <td>0.993474</td>\n",
138
+ " <td>Bullish</td>\n",
139
+ " <td>Bullish</td>\n",
140
+ " </tr>\n",
141
+ " <tr>\n",
142
+ " <th>154</th>\n",
143
+ " <td>368</td>\n",
144
+ " <td>The combined open interest for front-month Si...</td>\n",
145
+ " <td>ICE front-month Singapore HSFO open interest r...</td>\n",
146
+ " <td>2024-08-05 01:26:07+00:00</td>\n",
147
+ " <td>Heavy Distillates</td>\n",
148
+ " <td>Heavy Distillates</td>\n",
149
+ " <td>0.368089</td>\n",
150
+ " <td>0.017483</td>\n",
151
+ " <td>0.994805</td>\n",
152
+ " <td>Bullish</td>\n",
153
+ " <td>Bullish</td>\n",
154
+ " </tr>\n",
155
+ " <tr>\n",
156
+ " <th>155</th>\n",
157
+ " <td>369</td>\n",
158
+ " <td>Production will be increasing “in the near fu...</td>\n",
159
+ " <td>REFINERY NEWS: Fort Energy at Fujairah ‘remain...</td>\n",
160
+ " <td>2024-08-05 00:45:56+00:00</td>\n",
161
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
162
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
163
+ " <td>0.775953</td>\n",
164
+ " <td>0.520116</td>\n",
165
+ " <td>0.177664</td>\n",
166
+ " <td>Bearish</td>\n",
167
+ " <td>Bearish</td>\n",
168
+ " </tr>\n",
169
+ " <tr>\n",
170
+ " <th>156</th>\n",
171
+ " <td>370</td>\n",
172
+ " <td>Container ship Groton was attacked 125 nautic...</td>\n",
173
+ " <td>Container ship Groton attacked near Yemen amid...</td>\n",
174
+ " <td>2024-08-04 12:25:30+00:00</td>\n",
175
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
176
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
177
+ " <td>0.245594</td>\n",
178
+ " <td>0.044671</td>\n",
179
+ " <td>0.994086</td>\n",
180
+ " <td>Bullish</td>\n",
181
+ " <td>Bullish</td>\n",
182
+ " </tr>\n",
183
+ " <tr>\n",
184
+ " <th>157</th>\n",
185
+ " <td>371</td>\n",
186
+ " <td>A drone strike on an oil depot in Russia’s Be...</td>\n",
187
+ " <td>Oil depot in Russia’s Belgorod region hit by d...</td>\n",
188
+ " <td>2024-08-04 10:14:50+00:00</td>\n",
189
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
190
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
191
+ " <td>0.243901</td>\n",
192
+ " <td>0.951199</td>\n",
193
+ " <td>0.129076</td>\n",
194
+ " <td>Neutral</td>\n",
195
+ " <td>Neutral</td>\n",
196
+ " </tr>\n",
197
+ " </tbody>\n",
198
+ "</table>\n",
199
+ "<p>158 rows × 11 columns</p>\n",
200
+ "</div>"
201
+ ],
202
+ "text/plain": [
203
+ " Unnamed: 0 body \\\n",
204
+ "0 0 Spanish crude import volumes increased 11% ye... \n",
205
+ "1 1 A number of refineries in China have resumed ... \n",
206
+ "2 2 Some refineries in the Asia-Pacific region in... \n",
207
+ "3 3 The physical low sulfur (1%S) fuel oil Med-No... \n",
208
+ "4 4 Bunkering activity in India has experienced s... \n",
209
+ ".. ... ... \n",
210
+ "153 367 The Asian middle distillates complex could be... \n",
211
+ "154 368 The combined open interest for front-month Si... \n",
212
+ "155 369 Production will be increasing “in the near fu... \n",
213
+ "156 370 Container ship Groton was attacked 125 nautic... \n",
214
+ "157 371 A drone strike on an oil depot in Russia’s Be... \n",
215
+ "\n",
216
+ " headline \\\n",
217
+ "0 SPAIN DATA: H1 crude imports rise 11% to 1.4 m... \n",
218
+ "1 REFINERY NEWS: Host of Chinese units back from... \n",
219
+ "2 REFINERY NEWS ROUNDUP: Mixed runs in Asia-Pacific \n",
220
+ "3 Physical 1%S fuel oil Med-North spread hits re... \n",
221
+ "4 Indian ports see Jan-July bunker, STS calls up... \n",
222
+ ".. ... \n",
223
+ "153 ASIA MIDDLE DISTILLATES: Key market indicators... \n",
224
+ "154 ICE front-month Singapore HSFO open interest r... \n",
225
+ "155 REFINERY NEWS: Fort Energy at Fujairah ‘remain... \n",
226
+ "156 Container ship Groton attacked near Yemen amid... \n",
227
+ "157 Oil depot in Russia’s Belgorod region hit by d... \n",
228
+ "\n",
229
+ " updatedDate topic_prediction \\\n",
230
+ "0 2024-08-08 12:11:55+00:00 Crude Oil \n",
231
+ "1 2024-08-08 11:51:12+00:00 Macroeconomic & Geopolitics \n",
232
+ "2 2024-08-08 11:50:48+00:00 Macroeconomic & Geopolitics \n",
233
+ "3 2024-08-08 11:28:20+00:00 Middle Distillates \n",
234
+ "4 2024-08-08 11:27:15+00:00 Heavy Distillates \n",
235
+ ".. ... ... \n",
236
+ "153 2024-08-05 01:50:17+00:00 Middle Distillates \n",
237
+ "154 2024-08-05 01:26:07+00:00 Heavy Distillates \n",
238
+ "155 2024-08-05 00:45:56+00:00 Macroeconomic & Geopolitics \n",
239
+ "156 2024-08-04 12:25:30+00:00 Macroeconomic & Geopolitics \n",
240
+ "157 2024-08-04 10:14:50+00:00 Macroeconomic & Geopolitics \n",
241
+ "\n",
242
+ " topic_verification negative_score neutral_score \\\n",
243
+ "0 Crude Oil 0.991473 0.005524 \n",
244
+ "1 Macroeconomic & Geopolitics 0.417054 0.845595 \n",
245
+ "2 Macroeconomic & Geopolitics 0.268708 0.044504 \n",
246
+ "3 Middle Distillates 0.951985 0.009613 \n",
247
+ "4 Heavy Distillates 0.098844 0.059348 \n",
248
+ ".. ... ... ... \n",
249
+ "153 Middle Distillates 0.034932 0.310855 \n",
250
+ "154 Heavy Distillates 0.368089 0.017483 \n",
251
+ "155 Macroeconomic & Geopolitics 0.775953 0.520116 \n",
252
+ "156 Macroeconomic & Geopolitics 0.245594 0.044671 \n",
253
+ "157 Macroeconomic & Geopolitics 0.243901 0.951199 \n",
254
+ "\n",
255
+ " positive_score trend_prediction trend_verification \n",
256
+ "0 0.519264 Bearish Bearish \n",
257
+ "1 0.180685 Neutral Neutral \n",
258
+ "2 0.992063 Bullish Bullish \n",
259
+ "3 0.822905 Bearish Bearish \n",
260
+ "4 0.997325 Bullish Bullish \n",
261
+ ".. ... ... ... \n",
262
+ "153 0.993474 Bullish Bullish \n",
263
+ "154 0.994805 Bullish Bullish \n",
264
+ "155 0.177664 Bearish Bearish \n",
265
+ "156 0.994086 Bullish Bullish \n",
266
+ "157 0.129076 Neutral Neutral \n",
267
+ "\n",
268
+ "[158 rows x 11 columns]"
269
+ ]
270
+ },
271
+ "execution_count": 1,
272
+ "metadata": {},
273
+ "output_type": "execute_result"
274
+ }
275
+ ],
276
+ "source": [
277
+ "import pandas as pd\n",
278
+ "import os\n",
279
+ "\n",
280
+ "test = pd.read_csv(os.path.join('data','all_platts_1week_clean.csv'))\n",
281
+ "test"
282
+ ]
283
+ },
284
+ {
285
+ "cell_type": "code",
286
+ "execution_count": 4,
287
+ "metadata": {},
288
+ "outputs": [],
289
+ "source": [
290
+ "test.to_excel('evaluation.xlsx')"
291
+ ]
292
+ },
293
+ {
294
+ "cell_type": "code",
295
+ "execution_count": 9,
296
+ "metadata": {},
297
+ "outputs": [],
298
+ "source": [
299
+ "train = test.drop(columns=['Unnamed: 0', 'body', 'updatedDate', 'topic_prediction', 'negative_score', 'neutral_score', 'positive_score', 'trend_prediction', 'trend_verification'])"
300
+ ]
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": 11,
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": [
308
+ "train.to_csv('convert_first.csv')"
309
+ ]
310
+ },
311
+ {
312
+ "cell_type": "code",
313
+ "execution_count": 12,
314
+ "metadata": {},
315
+ "outputs": [],
316
+ "source": [
317
+ "dated_brent = pd.read_csv('data/dated_brent_allbate.csv')"
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "code",
322
+ "execution_count": 13,
323
+ "metadata": {},
324
+ "outputs": [],
325
+ "source": [
326
+ "dated_brent = dated_brent[dated_brent['assessDate'] >= '2024-08-04']\n",
327
+ "dated_brent = dated_brent[dated_brent['bate']=='c']"
328
+ ]
329
+ },
330
+ {
331
+ "cell_type": "code",
332
+ "execution_count": 14,
333
+ "metadata": {},
334
+ "outputs": [
335
+ {
336
+ "data": {
337
+ "text/html": [
338
+ "<div>\n",
339
+ "<style scoped>\n",
340
+ " .dataframe tbody tr th:only-of-type {\n",
341
+ " vertical-align: middle;\n",
342
+ " }\n",
343
+ "\n",
344
+ " .dataframe tbody tr th {\n",
345
+ " vertical-align: top;\n",
346
+ " }\n",
347
+ "\n",
348
+ " .dataframe thead th {\n",
349
+ " text-align: right;\n",
350
+ " }\n",
351
+ "</style>\n",
352
+ "<table border=\"1\" class=\"dataframe\">\n",
353
+ " <thead>\n",
354
+ " <tr style=\"text-align: right;\">\n",
355
+ " <th></th>\n",
356
+ " <th>Unnamed: 0</th>\n",
357
+ " <th>bate</th>\n",
358
+ " <th>value</th>\n",
359
+ " <th>assessDate</th>\n",
360
+ " <th>isCorrected</th>\n",
361
+ " <th>modDate</th>\n",
362
+ " <th>symbol</th>\n",
363
+ " </tr>\n",
364
+ " </thead>\n",
365
+ " <tbody>\n",
366
+ " <tr>\n",
367
+ " <th>8019</th>\n",
368
+ " <td>8019</td>\n",
369
+ " <td>c</td>\n",
370
+ " <td>76.700</td>\n",
371
+ " <td>2024-08-05</td>\n",
372
+ " <td>N</td>\n",
373
+ " <td>2024-08-05 17:55:34</td>\n",
374
+ " <td>PCAAS00</td>\n",
375
+ " </tr>\n",
376
+ " <tr>\n",
377
+ " <th>8022</th>\n",
378
+ " <td>8022</td>\n",
379
+ " <td>c</td>\n",
380
+ " <td>76.275</td>\n",
381
+ " <td>2024-08-06</td>\n",
382
+ " <td>N</td>\n",
383
+ " <td>2024-08-06 17:48:25</td>\n",
384
+ " <td>PCAAS00</td>\n",
385
+ " </tr>\n",
386
+ " <tr>\n",
387
+ " <th>8025</th>\n",
388
+ " <td>8025</td>\n",
389
+ " <td>c</td>\n",
390
+ " <td>79.910</td>\n",
391
+ " <td>2024-08-07</td>\n",
392
+ " <td>N</td>\n",
393
+ " <td>2024-08-07 19:01:31</td>\n",
394
+ " <td>PCAAS00</td>\n",
395
+ " </tr>\n",
396
+ " <tr>\n",
397
+ " <th>8028</th>\n",
398
+ " <td>8028</td>\n",
399
+ " <td>c</td>\n",
400
+ " <td>81.100</td>\n",
401
+ " <td>2024-08-08</td>\n",
402
+ " <td>N</td>\n",
403
+ " <td>2024-08-08 18:15:44</td>\n",
404
+ " <td>PCAAS00</td>\n",
405
+ " </tr>\n",
406
+ " <tr>\n",
407
+ " <th>8031</th>\n",
408
+ " <td>8031</td>\n",
409
+ " <td>c</td>\n",
410
+ " <td>81.615</td>\n",
411
+ " <td>2024-08-09</td>\n",
412
+ " <td>N</td>\n",
413
+ " <td>2024-08-09 18:00:52</td>\n",
414
+ " <td>PCAAS00</td>\n",
415
+ " </tr>\n",
416
+ " </tbody>\n",
417
+ "</table>\n",
418
+ "</div>"
419
+ ],
420
+ "text/plain": [
421
+ " Unnamed: 0 bate value assessDate isCorrected modDate \\\n",
422
+ "8019 8019 c 76.700 2024-08-05 N 2024-08-05 17:55:34 \n",
423
+ "8022 8022 c 76.275 2024-08-06 N 2024-08-06 17:48:25 \n",
424
+ "8025 8025 c 79.910 2024-08-07 N 2024-08-07 19:01:31 \n",
425
+ "8028 8028 c 81.100 2024-08-08 N 2024-08-08 18:15:44 \n",
426
+ "8031 8031 c 81.615 2024-08-09 N 2024-08-09 18:00:52 \n",
427
+ "\n",
428
+ " symbol \n",
429
+ "8019 PCAAS00 \n",
430
+ "8022 PCAAS00 \n",
431
+ "8025 PCAAS00 \n",
432
+ "8028 PCAAS00 \n",
433
+ "8031 PCAAS00 "
434
+ ]
435
+ },
436
+ "execution_count": 14,
437
+ "metadata": {},
438
+ "output_type": "execute_result"
439
+ }
440
+ ],
441
+ "source": [
442
+ "dated_brent"
443
+ ]
444
+ },
445
+ {
446
+ "cell_type": "code",
447
+ "execution_count": 1,
448
+ "metadata": {},
449
+ "outputs": [
450
+ {
451
+ "data": {
452
+ "text/html": [
453
+ "<div>\n",
454
+ "<style scoped>\n",
455
+ " .dataframe tbody tr th:only-of-type {\n",
456
+ " vertical-align: middle;\n",
457
+ " }\n",
458
+ "\n",
459
+ " .dataframe tbody tr th {\n",
460
+ " vertical-align: top;\n",
461
+ " }\n",
462
+ "\n",
463
+ " .dataframe thead th {\n",
464
+ " text-align: right;\n",
465
+ " }\n",
466
+ "</style>\n",
467
+ "<table border=\"1\" class=\"dataframe\">\n",
468
+ " <thead>\n",
469
+ " <tr style=\"text-align: right;\">\n",
470
+ " <th></th>\n",
471
+ " <th>Unnamed: 0</th>\n",
472
+ " <th>body</th>\n",
473
+ " <th>headline</th>\n",
474
+ " <th>updatedDate</th>\n",
475
+ " <th>topic_prediction</th>\n",
476
+ " <th>topic_verification</th>\n",
477
+ " <th>negative_score</th>\n",
478
+ " <th>neutral_score</th>\n",
479
+ " <th>positive_score</th>\n",
480
+ " <th>trend_prediction</th>\n",
481
+ " <th>trend_verification</th>\n",
482
+ " <th>Unnamed: 11</th>\n",
483
+ " <th>Price Data</th>\n",
484
+ " <th>Unnamed: 13</th>\n",
485
+ " <th>Unnamed: 14</th>\n",
486
+ " <th>Bearish Prediction</th>\n",
487
+ " <th>Neutral Prediction</th>\n",
488
+ " <th>Bullish Prediction</th>\n",
489
+ " <th>Prediction (All News)</th>\n",
490
+ " </tr>\n",
491
+ " </thead>\n",
492
+ " <tbody>\n",
493
+ " <tr>\n",
494
+ " <th>0</th>\n",
495
+ " <td>0</td>\n",
496
+ " <td>Spanish crude import volumes increased 11% ye...</td>\n",
497
+ " <td>SPAIN DATA: H1 crude imports rise 11% to 1.4 m...</td>\n",
498
+ " <td>2024-08-08 12:11:55+00:00</td>\n",
499
+ " <td>Crude Oil</td>\n",
500
+ " <td>Crude Oil</td>\n",
501
+ " <td>0.991473</td>\n",
502
+ " <td>0.005524</td>\n",
503
+ " <td>0.519264</td>\n",
504
+ " <td>Bearish</td>\n",
505
+ " <td>Bearish</td>\n",
506
+ " <td>NaN</td>\n",
507
+ " <td>2024-08-05</td>\n",
508
+ " <td>76.7</td>\n",
509
+ " <td>NaN</td>\n",
510
+ " <td>0.575591</td>\n",
511
+ " <td>0.183341</td>\n",
512
+ " <td>0.748952</td>\n",
513
+ " <td>Bullish</td>\n",
514
+ " </tr>\n",
515
+ " <tr>\n",
516
+ " <th>1</th>\n",
517
+ " <td>1</td>\n",
518
+ " <td>A number of refineries in China have resumed ...</td>\n",
519
+ " <td>REFINERY NEWS: Host of Chinese units back from...</td>\n",
520
+ " <td>2024-08-08 11:51:12+00:00</td>\n",
521
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
522
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
523
+ " <td>0.417054</td>\n",
524
+ " <td>0.845595</td>\n",
525
+ " <td>0.180685</td>\n",
526
+ " <td>Neutral</td>\n",
527
+ " <td>Neutral</td>\n",
528
+ " <td>NaN</td>\n",
529
+ " <td>2024-08-06</td>\n",
530
+ " <td>76.275</td>\n",
531
+ " <td>NaN</td>\n",
532
+ " <td>0.653413</td>\n",
533
+ " <td>0.137436</td>\n",
534
+ " <td>0.672831</td>\n",
535
+ " <td>Bullish</td>\n",
536
+ " </tr>\n",
537
+ " <tr>\n",
538
+ " <th>2</th>\n",
539
+ " <td>2</td>\n",
540
+ " <td>Some refineries in the Asia-Pacific region in...</td>\n",
541
+ " <td>REFINERY NEWS ROUNDUP: Mixed runs in Asia-Pacific</td>\n",
542
+ " <td>2024-08-08 11:50:48+00:00</td>\n",
543
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
544
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
545
+ " <td>0.268708</td>\n",
546
+ " <td>0.044504</td>\n",
547
+ " <td>0.992063</td>\n",
548
+ " <td>Bullish</td>\n",
549
+ " <td>Bullish</td>\n",
550
+ " <td>NaN</td>\n",
551
+ " <td>2024-08-07</td>\n",
552
+ " <td>79.91</td>\n",
553
+ " <td>NaN</td>\n",
554
+ " <td>0.653586</td>\n",
555
+ " <td>0.096258</td>\n",
556
+ " <td>0.727852</td>\n",
557
+ " <td>Bullish</td>\n",
558
+ " </tr>\n",
559
+ " <tr>\n",
560
+ " <th>3</th>\n",
561
+ " <td>3</td>\n",
562
+ " <td>The physical low sulfur (1%S) fuel oil Med-No...</td>\n",
563
+ " <td>Physical 1%S fuel oil Med-North spread hits re...</td>\n",
564
+ " <td>2024-08-08 11:28:20+00:00</td>\n",
565
+ " <td>Middle Distillates</td>\n",
566
+ " <td>Middle Distillates</td>\n",
567
+ " <td>0.951985</td>\n",
568
+ " <td>0.009613</td>\n",
569
+ " <td>0.822905</td>\n",
570
+ " <td>Bearish</td>\n",
571
+ " <td>Bearish</td>\n",
572
+ " <td>NaN</td>\n",
573
+ " <td>2024-08-08</td>\n",
574
+ " <td>81.1</td>\n",
575
+ " <td>NaN</td>\n",
576
+ " <td>0.499159</td>\n",
577
+ " <td>0.209326</td>\n",
578
+ " <td>0.750482</td>\n",
579
+ " <td>Bullish</td>\n",
580
+ " </tr>\n",
581
+ " <tr>\n",
582
+ " <th>4</th>\n",
583
+ " <td>4</td>\n",
584
+ " <td>Bunkering activity in India has experienced s...</td>\n",
585
+ " <td>Indian ports see Jan-July bunker, STS calls up...</td>\n",
586
+ " <td>2024-08-08 11:27:15+00:00</td>\n",
587
+ " <td>Heavy Distillates</td>\n",
588
+ " <td>Heavy Distillates</td>\n",
589
+ " <td>0.098844</td>\n",
590
+ " <td>0.059348</td>\n",
591
+ " <td>0.997325</td>\n",
592
+ " <td>Bullish</td>\n",
593
+ " <td>Bullish</td>\n",
594
+ " <td>NaN</td>\n",
595
+ " <td>2024-08-09</td>\n",
596
+ " <td>81.615</td>\n",
597
+ " <td>NaN</td>\n",
598
+ " <td>NaN</td>\n",
599
+ " <td>NaN</td>\n",
600
+ " <td>NaN</td>\n",
601
+ " <td>NaN</td>\n",
602
+ " </tr>\n",
603
+ " <tr>\n",
604
+ " <th>...</th>\n",
605
+ " <td>...</td>\n",
606
+ " <td>...</td>\n",
607
+ " <td>...</td>\n",
608
+ " <td>...</td>\n",
609
+ " <td>...</td>\n",
610
+ " <td>...</td>\n",
611
+ " <td>...</td>\n",
612
+ " <td>...</td>\n",
613
+ " <td>...</td>\n",
614
+ " <td>...</td>\n",
615
+ " <td>...</td>\n",
616
+ " <td>...</td>\n",
617
+ " <td>...</td>\n",
618
+ " <td>...</td>\n",
619
+ " <td>...</td>\n",
620
+ " <td>...</td>\n",
621
+ " <td>...</td>\n",
622
+ " <td>...</td>\n",
623
+ " <td>...</td>\n",
624
+ " </tr>\n",
625
+ " <tr>\n",
626
+ " <th>136</th>\n",
627
+ " <td>136</td>\n",
628
+ " <td>Saudi Aramco maintained or raised the Asia-bo...</td>\n",
629
+ " <td>Saudi Aramco maintains or raises Asia-bound Se...</td>\n",
630
+ " <td>2024-08-05 01:59:16+00:00</td>\n",
631
+ " <td>Crude Oil</td>\n",
632
+ " <td>Crude Oil</td>\n",
633
+ " <td>0.154642</td>\n",
634
+ " <td>0.032633</td>\n",
635
+ " <td>0.997273</td>\n",
636
+ " <td>Bullish</td>\n",
637
+ " <td>Bullish</td>\n",
638
+ " <td>NaN</td>\n",
639
+ " <td>NaT</td>\n",
640
+ " <td>NaN</td>\n",
641
+ " <td>NaN</td>\n",
642
+ " <td>NaN</td>\n",
643
+ " <td>NaN</td>\n",
644
+ " <td>NaN</td>\n",
645
+ " <td>NaN</td>\n",
646
+ " </tr>\n",
647
+ " <tr>\n",
648
+ " <th>137</th>\n",
649
+ " <td>137</td>\n",
650
+ " <td>The combined open interest for front-month Si...</td>\n",
651
+ " <td>ICE front-month Singapore HSFO open interest r...</td>\n",
652
+ " <td>2024-08-05 01:26:07+00:00</td>\n",
653
+ " <td>Heavy Distillates</td>\n",
654
+ " <td>Heavy Distillates</td>\n",
655
+ " <td>0.368089</td>\n",
656
+ " <td>0.017483</td>\n",
657
+ " <td>0.994805</td>\n",
658
+ " <td>Bullish</td>\n",
659
+ " <td>Bullish</td>\n",
660
+ " <td>NaN</td>\n",
661
+ " <td>NaT</td>\n",
662
+ " <td>NaN</td>\n",
663
+ " <td>NaN</td>\n",
664
+ " <td>NaN</td>\n",
665
+ " <td>NaN</td>\n",
666
+ " <td>NaN</td>\n",
667
+ " <td>NaN</td>\n",
668
+ " </tr>\n",
669
+ " <tr>\n",
670
+ " <th>138</th>\n",
671
+ " <td>138</td>\n",
672
+ " <td>Production will be increasing “in the near fu...</td>\n",
673
+ " <td>REFINERY NEWS: Fort Energy at Fujairah ‘remain...</td>\n",
674
+ " <td>2024-08-05 00:45:56+00:00</td>\n",
675
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
676
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
677
+ " <td>0.775953</td>\n",
678
+ " <td>0.520116</td>\n",
679
+ " <td>0.177664</td>\n",
680
+ " <td>Bearish</td>\n",
681
+ " <td>Bearish</td>\n",
682
+ " <td>NaN</td>\n",
683
+ " <td>NaT</td>\n",
684
+ " <td>NaN</td>\n",
685
+ " <td>NaN</td>\n",
686
+ " <td>NaN</td>\n",
687
+ " <td>NaN</td>\n",
688
+ " <td>NaN</td>\n",
689
+ " <td>NaN</td>\n",
690
+ " </tr>\n",
691
+ " <tr>\n",
692
+ " <th>139</th>\n",
693
+ " <td>139</td>\n",
694
+ " <td>Container ship Groton was attacked 125 nautic...</td>\n",
695
+ " <td>Container ship Groton attacked near Yemen amid...</td>\n",
696
+ " <td>2024-08-04 12:25:30+00:00</td>\n",
697
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
698
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
699
+ " <td>0.245594</td>\n",
700
+ " <td>0.044671</td>\n",
701
+ " <td>0.994086</td>\n",
702
+ " <td>Bullish</td>\n",
703
+ " <td>Bullish</td>\n",
704
+ " <td>NaN</td>\n",
705
+ " <td>NaT</td>\n",
706
+ " <td>NaN</td>\n",
707
+ " <td>NaN</td>\n",
708
+ " <td>NaN</td>\n",
709
+ " <td>NaN</td>\n",
710
+ " <td>NaN</td>\n",
711
+ " <td>NaN</td>\n",
712
+ " </tr>\n",
713
+ " <tr>\n",
714
+ " <th>140</th>\n",
715
+ " <td>140</td>\n",
716
+ " <td>A drone strike on an oil depot in Russia’s Be...</td>\n",
717
+ " <td>Oil depot in Russia’s Belgorod region hit by d...</td>\n",
718
+ " <td>2024-08-04 10:14:50+00:00</td>\n",
719
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
720
+ " <td>Macroeconomic &amp; Geopolitics</td>\n",
721
+ " <td>0.243901</td>\n",
722
+ " <td>0.951199</td>\n",
723
+ " <td>0.129076</td>\n",
724
+ " <td>Neutral</td>\n",
725
+ " <td>Neutral</td>\n",
726
+ " <td>NaN</td>\n",
727
+ " <td>NaT</td>\n",
728
+ " <td>NaN</td>\n",
729
+ " <td>NaN</td>\n",
730
+ " <td>NaN</td>\n",
731
+ " <td>NaN</td>\n",
732
+ " <td>NaN</td>\n",
733
+ " <td>NaN</td>\n",
734
+ " </tr>\n",
735
+ " </tbody>\n",
736
+ "</table>\n",
737
+ "<p>141 rows × 19 columns</p>\n",
738
+ "</div>"
739
+ ],
740
+ "text/plain": [
741
+ " Unnamed: 0 body \\\n",
742
+ "0 0 Spanish crude import volumes increased 11% ye... \n",
743
+ "1 1 A number of refineries in China have resumed ... \n",
744
+ "2 2 Some refineries in the Asia-Pacific region in... \n",
745
+ "3 3 The physical low sulfur (1%S) fuel oil Med-No... \n",
746
+ "4 4 Bunkering activity in India has experienced s... \n",
747
+ ".. ... ... \n",
748
+ "136 136 Saudi Aramco maintained or raised the Asia-bo... \n",
749
+ "137 137 The combined open interest for front-month Si... \n",
750
+ "138 138 Production will be increasing “in the near fu... \n",
751
+ "139 139 Container ship Groton was attacked 125 nautic... \n",
752
+ "140 140 A drone strike on an oil depot in Russia’s Be... \n",
753
+ "\n",
754
+ " headline \\\n",
755
+ "0 SPAIN DATA: H1 crude imports rise 11% to 1.4 m... \n",
756
+ "1 REFINERY NEWS: Host of Chinese units back from... \n",
757
+ "2 REFINERY NEWS ROUNDUP: Mixed runs in Asia-Pacific \n",
758
+ "3 Physical 1%S fuel oil Med-North spread hits re... \n",
759
+ "4 Indian ports see Jan-July bunker, STS calls up... \n",
760
+ ".. ... \n",
761
+ "136 Saudi Aramco maintains or raises Asia-bound Se... \n",
762
+ "137 ICE front-month Singapore HSFO open interest r... \n",
763
+ "138 REFINERY NEWS: Fort Energy at Fujairah ‘remain... \n",
764
+ "139 Container ship Groton attacked near Yemen amid... \n",
765
+ "140 Oil depot in Russia’s Belgorod region hit by d... \n",
766
+ "\n",
767
+ " updatedDate topic_prediction \\\n",
768
+ "0 2024-08-08 12:11:55+00:00 Crude Oil \n",
769
+ "1 2024-08-08 11:51:12+00:00 Macroeconomic & Geopolitics \n",
770
+ "2 2024-08-08 11:50:48+00:00 Macroeconomic & Geopolitics \n",
771
+ "3 2024-08-08 11:28:20+00:00 Middle Distillates \n",
772
+ "4 2024-08-08 11:27:15+00:00 Heavy Distillates \n",
773
+ ".. ... ... \n",
774
+ "136 2024-08-05 01:59:16+00:00 Crude Oil \n",
775
+ "137 2024-08-05 01:26:07+00:00 Heavy Distillates \n",
776
+ "138 2024-08-05 00:45:56+00:00 Macroeconomic & Geopolitics \n",
777
+ "139 2024-08-04 12:25:30+00:00 Macroeconomic & Geopolitics \n",
778
+ "140 2024-08-04 10:14:50+00:00 Macroeconomic & Geopolitics \n",
779
+ "\n",
780
+ " topic_verification negative_score neutral_score \\\n",
781
+ "0 Crude Oil 0.991473 0.005524 \n",
782
+ "1 Macroeconomic & Geopolitics 0.417054 0.845595 \n",
783
+ "2 Macroeconomic & Geopolitics 0.268708 0.044504 \n",
784
+ "3 Middle Distillates 0.951985 0.009613 \n",
785
+ "4 Heavy Distillates 0.098844 0.059348 \n",
786
+ ".. ... ... ... \n",
787
+ "136 Crude Oil 0.154642 0.032633 \n",
788
+ "137 Heavy Distillates 0.368089 0.017483 \n",
789
+ "138 Macroeconomic & Geopolitics 0.775953 0.520116 \n",
790
+ "139 Macroeconomic & Geopolitics 0.245594 0.044671 \n",
791
+ "140 Macroeconomic & Geopolitics 0.243901 0.951199 \n",
792
+ "\n",
793
+ " positive_score trend_prediction trend_verification Unnamed: 11 \\\n",
794
+ "0 0.519264 Bearish Bearish NaN \n",
795
+ "1 0.180685 Neutral Neutral NaN \n",
796
+ "2 0.992063 Bullish Bullish NaN \n",
797
+ "3 0.822905 Bearish Bearish NaN \n",
798
+ "4 0.997325 Bullish Bullish NaN \n",
799
+ ".. ... ... ... ... \n",
800
+ "136 0.997273 Bullish Bullish NaN \n",
801
+ "137 0.994805 Bullish Bullish NaN \n",
802
+ "138 0.177664 Bearish Bearish NaN \n",
803
+ "139 0.994086 Bullish Bullish NaN \n",
804
+ "140 0.129076 Neutral Neutral NaN \n",
805
+ "\n",
806
+ " Price Data Unnamed: 13 Unnamed: 14 Bearish Prediction \\\n",
807
+ "0 2024-08-05 76.7 NaN 0.575591 \n",
808
+ "1 2024-08-06 76.275 NaN 0.653413 \n",
809
+ "2 2024-08-07 79.91 NaN 0.653586 \n",
810
+ "3 2024-08-08 81.1 NaN 0.499159 \n",
811
+ "4 2024-08-09 81.615 NaN NaN \n",
812
+ ".. ... ... ... ... \n",
813
+ "136 NaT NaN NaN NaN \n",
814
+ "137 NaT NaN NaN NaN \n",
815
+ "138 NaT NaN NaN NaN \n",
816
+ "139 NaT NaN NaN NaN \n",
817
+ "140 NaT NaN NaN NaN \n",
818
+ "\n",
819
+ " Neutral Prediction Bullish Prediction Prediction (All News) \n",
820
+ "0 0.183341 0.748952 Bullish \n",
821
+ "1 0.137436 0.672831 Bullish \n",
822
+ "2 0.096258 0.727852 Bullish \n",
823
+ "3 0.209326 0.750482 Bullish \n",
824
+ "4 NaN NaN NaN \n",
825
+ ".. ... ... ... \n",
826
+ "136 NaN NaN NaN \n",
827
+ "137 NaN NaN NaN \n",
828
+ "138 NaN NaN NaN \n",
829
+ "139 NaN NaN NaN \n",
830
+ "140 NaN NaN NaN \n",
831
+ "\n",
832
+ "[141 rows x 19 columns]"
833
+ ]
834
+ },
835
+ "execution_count": 1,
836
+ "metadata": {},
837
+ "output_type": "execute_result"
838
+ }
839
+ ],
840
+ "source": [
841
+ "import pandas as pd\n",
842
+ "eval = pd.read_excel('evaluation.xlsx')\n",
843
+ "eval"
844
+ ]
845
+ },
846
+ {
847
+ "cell_type": "code",
848
+ "execution_count": null,
849
+ "metadata": {},
850
+ "outputs": [],
851
+ "source": []
852
+ },
853
+ {
854
+ "cell_type": "code",
855
+ "execution_count": 16,
856
+ "metadata": {},
857
+ "outputs": [
858
+ {
859
+ "name": "stdout",
860
+ "output_type": "stream",
861
+ "text": [
862
+ "Bearish prediction 0.5093043801287273\n",
863
+ " Neutral prediction 0.24558571118013772\n",
864
+ " Bullish prediction 0.7223830104380737\n"
865
+ ]
866
+ }
867
+ ],
868
+ "source": [
869
+ "test = eval[(eval['topic_prediction']=='Macroeconomic & Geopolitics') | (eval['topic_prediction']=='Crude Oil')]\n",
870
+ "test = test[test['updatedDate'].str.contains('2024-08-08')]\n",
871
+ "print(f'Bearish prediction {test.negative_score.mean()}\\n Neutral prediction {test.neutral_score.mean()}\\n Bullish prediction {test.positive_score.mean()}')"
872
+ ]
873
+ },
874
+ {
875
+ "cell_type": "code",
876
+ "execution_count": null,
877
+ "metadata": {},
878
+ "outputs": [],
879
+ "source": []
880
+ }
881
+ ],
882
+ "metadata": {
883
+ "kernelspec": {
884
+ "display_name": "rl",
885
+ "language": "python",
886
+ "name": "python3"
887
+ },
888
+ "language_info": {
889
+ "codemirror_mode": {
890
+ "name": "ipython",
891
+ "version": 3
892
+ },
893
+ "file_extension": ".py",
894
+ "mimetype": "text/x-python",
895
+ "name": "python",
896
+ "nbconvert_exporter": "python",
897
+ "pygments_lexer": "ipython3",
898
+ "version": "3.9.18"
899
+ }
900
+ },
901
+ "nbformat": 4,
902
+ "nbformat_minor": 2
903
+ }