PortalLVAM / apps /Google_Trends.py
bullm's picture
ldsakjl
025632f
from modules import tables
from google_tools import trends as gtrends
import pandas as pd
import numpy as np
from datetime import timedelta, date
from statsmodels.tsa.seasonal import seasonal_decompose
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import streamlit as st
import io
import boto3
import openpyxl
key ='AKIARYMZ4J2YQDB66VX4'
secret_key = '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'
bucket='portallvam'
path ='Momentum.xlsx'
def save_s3(key, secret_key, bucket, df, path):
with io.BytesIO() as output:
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
for industry in df.keys():
df[industry].to_excel(writer, sheet_name=industry)
data = output.getvalue()
s3 = boto3.resource('s3', aws_access_key_id=key, aws_secret_access_key=secret_key)
s3.Bucket(bucket).put_object(Key=path, Body=data)
def read_excel_s3(key, secret_key, bucket, path):
s3_client = boto3.client('s3', aws_access_key_id=key, aws_secret_access_key=secret_key)
response = s3_client.get_object(Bucket=bucket, Key=path)
data = response["Body"].read()
df = pd.read_excel(io.BytesIO(data), sheet_name=None, index_col='Unnamed: 0.1')
return df
def generar_excel(ruta_guardado, Pestanas, Data):
wb = openpyxl.Workbook()
writer = pd.ExcelWriter(ruta_guardado)
for pestana in Pestanas:
wb.create_sheet(pestana)
std = wb.get_sheet_by_name('Sheet')
wb.remove_sheet(std)
wb.save(ruta_guardado)
for i, pestana in enumerate(Pestanas):
if pestana=='Real Estate Management & Development-CL':
pestana = 'Real Estate-CL'
Data['Real Estate Management & Development-CL'].to_excel(writer, sheet_name=pestana)
elif pestana=='Real Estate Management & Development-BR':
pestana = 'Real Estate-BR'
Data['Real Estate Management & Development-BR'].to_excel(writer, sheet_name=pestana)
else:
Data[pestana].to_excel(writer, sheet_name=Pestanas[i])
writer.save()
def colores_corporativos(colors=None):
color_dict = {'red': (204, 0, 51),
'light_blue': (110, 162, 201),
'light_gray': (135, 146, 158),
'grey': (105, 105, 105),
'yellow': (195, 195, 9),
'dark_purple': (119, 28, 95),
'blue': (42, 83, 113),
'purple': (159, 37, 127),
'light_yellow': (252, 252, 196),
'light_green': (122, 178, 153),
'gray': (66, 74, 82)}
for key in color_dict:
color_dict[key] = tuple(v/255 for v in color_dict[key])
if colors is None:
return color_dict
else:
aux = {col: color_dict[col] for col in colors}
return aux
corp_colors = list(colores_corporativos().values())
colors2 = []
for i in range(len(corp_colors)):
colors2.append("rgb" + str(corp_colors[i]))
company_db = pd.read_excel('Data/Company_Base_Definitivo.xlsx', sheet_name='Compilado')
id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_db.iterrows()}
countries_dict = {'BR': 'Brazil', 'CL': 'Chile', 'US': 'Brazil',
'US-Disease': 'Brazil'}
@st.cache(suppress_st_warning=True)
def data_request(countries, start, currency='USD'):
close_price = {'Brazil': [],
'Chile': []}
market_cap = {'Brazil': [],
'Chile': []}
for c in countries:
close_price[c] = tables.EquityMaster(field='IQ_CLOSEPRICE_ADJ', currency=currency, country=c).query(
rename=['asset'], start=start, expand=True)
market_cap[c] = tables.EquityMaster(field='IQ_MARKETCAP', currency=currency, country=c).query(
start=start, rename=['asset'], expand=True)
close_price[c] = close_price[c].loc[:, close_price[c].columns.isin(id_to_ticker.keys())]
close_price[c].columns = [id_to_ticker[col] for col in close_price[c].columns]
market_cap[c] = market_cap[c].loc[:, market_cap[c].columns.isin(id_to_ticker.keys())]
market_cap[c].columns = [id_to_ticker[col] for col in market_cap[c].columns]
return [close_price, market_cap]
@st.cache(suppress_st_warning=True)
def trends_request(keywords, today):
trends_frames_dict = {}
for sector, values in keywords.items():
if not (sector in ['Restaurantes']):
trends_frames_dict[sector] = {}
print('Buscando para ' + sector)
for country_name in values.columns:
words = values[country_name].dropna()
if '-' in country_name:
fixed_country_name = country_name.split('-')[0].strip()
else:
fixed_country_name = country_name
words_index = pd.DataFrame()
for word in words:
new_data = gtrends.keyword_trend(word, fixed_country_name, end_date=today)
if new_data is not None:
words_index = pd.concat([words_index,
new_data],
axis=1)
else:
print('No se encuentra data para ' + word)
trends_frames_dict[sector][country_name] = words_index
trends_frames_dict[sector][country_name].index.name = None
return trends_frames_dict
def trends_frames_excel(dicc):
sheets_cl = []
sheets_br = []
for key_1 in dicc.keys():
for key_2 in dicc[key_1].keys():
if key_2=='CL':
sheets_cl .append(key_1 + '-' + key_2)
else:
sheets_br.append(key_1 + '-' + key_2)
trends_frames_dict_cl = {}
trends_frames_dict_br = {}
for key_1 in dicc.keys():
for key_2 in dicc[key_1].keys():
if key_2=='CL':
trends_frames_dict_cl[key_1 + '-'+ key_2] = dicc[key_1][key_2]
elif key_2=='BR':
trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2]
elif key_2=='US' or key_2=='US-Disease':
trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2]
generar_excel('Data/GT_CL.xlsx', sheets_cl, trends_frames_dict_cl)
df_cl = pd.read_excel('Data/GT_CL.xlsx', sheet_name=None)
st.write(df_cl)
save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_cl, path='GT_CL.xlsx')
generar_excel('Data/GT_BR.xlsx', sheets_br, trends_frames_dict_br)
df_br = pd.read_excel('Data/GT_BR.xlsx', sheet_name=None)
save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_br, path='GT_BR.xlsx')
def read_trends_frames(country):
if country=='CL':
return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_CL.xlsx')
elif country=='BR':
return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_BR.xlsx')
def report():
form = st.form('Report')
start_date = str(date.today() - timedelta(5 * 365))
select_countries = form.multiselect('驴Qu茅 pa铆s(es) desea visualizar?', ['Todos', 'Chile', 'Brazil'])
if 'Todos' in select_countries:
select_countries = ['Chile', 'Brazil']
update_data = form.form_submit_button("Actualizar Datos")
accept = form.form_submit_button('Visualizar')
col1, col2 = st.columns(2)
if update_data:
xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx')
industry_filter = ['Pesca', 'Agricola', 'Financials-RP']
keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names
if sheet not in industry_filter}
xls.close()
del xls
# Arreglamos una llave porque una hoja de excel alcanza el m谩ximo de caracteres posible para un nombre.
new_key = "Real Estate Management & Development"
old_key = "Real Estate Management & Develo"
keywords_dict[new_key] = keywords_dict.pop(old_key)
trends_dict = trends_request(keywords_dict, date.today())
trends_frames_excel(trends_dict)
ud = pd.read_excel('Data/update_data.xlsx')
ud = ud[ud['View'] != 'Google Trends']
today = date.today().strftime('%d-%m-%Y')
ud = ud.append({"View": 'Google Trends',
"Last_Update": today}, ignore_index=True)
ud.to_excel('Data/update_data.xlsx', index=False)
if accept:
close_price_dict, market_cap_dict = data_request(select_countries, start_date)
ew_index = {}
mw_index = {}
country_index = {}
if select_countries == ['Brazil']:
dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index)
.union(set(close_price_dict['Brazil'].index))))}
elif select_countries == ['Chile']:
dates = {'Chile': sorted(list(set(market_cap_dict['Chile'].index)
.union(set(close_price_dict['Chile'].index))))}
else:
dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index)
.union(set(close_price_dict['Brazil'].index)))),
'Chile': sorted(list(set(market_cap_dict['Chile'].index)
.union(set(close_price_dict['Chile'].index))))}
for country in select_countries:
mkt = market_cap_dict[country]
cp = close_price_dict[country]
w = mkt.div(mkt.sum(1).values, axis=0)
rets = cp.pct_change()
country_index[country] = pd.DataFrame({'MW': (w * rets).sum(1),
'EW': rets.mean(1)}).fillna(0)
industries_1 = np.unique(company_db[['LV1']].values)
industries_2 = np.unique(company_db[['LV2']].values)
industries = np.unique(np.concatenate([industries_1, industries_2]))
df_mw_index = pd.DataFrame(columns=industries, index=dates[country])
df_ew_index = pd.DataFrame(columns=industries, index=dates[country])
for industry in industries:
industry = str(industry)
mc = mkt.loc[:, mkt.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])]
prices = cp.loc[:, cp.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])]
w = mc.div(mc.sum(1).values, axis=0)
rets = prices.pct_change()
df_mw_index[industry] = (w * rets).sum(1)
df_ew_index[industry] = rets.mean(1)
mw_index[country] = df_mw_index.fillna(0)
ew_index[country] = df_ew_index.fillna(0)
xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx')
industry_filter = ['Pesca', 'Agricola', 'Financials-RP', 'Agriculture']
keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names
if sheet not in industry_filter}
xls.close()
del xls
new_key = "Real Estate Management & Development"
old_key = "Real Estate Management & Develo"
keywords_dict[new_key] = keywords_dict.pop(old_key)
trends_frames = {}
trends_frames_cl = read_trends_frames('CL')
trends_frames_br = read_trends_frames('BR')
for key_cl in trends_frames_cl.keys():
trends_frames_cl[key_cl] = trends_frames_cl[key_cl].drop(columns='Unnamed: 0')
for key_br in trends_frames_br.keys():
trends_frames_br[key_br] = trends_frames_br[key_br].drop(columns='Unnamed: 0')
for industry in keywords_dict.keys():
if not industry=='Restaurantes':
countries_in_industry = keywords_dict[industry].columns
trends_frames[industry] = {}
for c in countries_in_industry:
if c=='CL':
if industry == 'Real Estate Management & Development':
index = trends_frames_cl['Real Estate-CL'].index
trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl['Real Estate-CL'].columns,
index=index)
else:
index = trends_frames_cl[industry+'-CL'].index
trends_frames_cl[industry+'-CL'] = trends_frames_cl[industry+'-CL'].loc[:, trends_frames_cl[industry+'-CL'].columns.notnull()]
trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl[industry+'-CL'].columns,
index=index)
elif c=='BR':
if industry == 'Real Estate Management & Development':
index = trends_frames_br['Real Estate-BR'].index
trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br['Real Estate-BR'].columns,
index=index)
else:
index = trends_frames_br[industry + '-BR'].index
trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br[industry+'-BR'].columns,
index=index)
if 'CL' in countries_in_industry:
if industry == 'Real Estate Management & Development':
for col_cl in trends_frames_cl['Real Estate-CL'].columns:
if col_cl in keywords_dict[industry]['CL'].values:
trends_frames[industry]['CL'][col_cl] = trends_frames_cl['Real Estate-CL'][col_cl].dropna()
else:
for col_cl in trends_frames_cl[industry+'-CL'].columns:
if col_cl in keywords_dict[industry]['CL'].values:
trends_frames[industry]['CL'][col_cl] = trends_frames_cl[industry+'-CL'][col_cl].dropna()
if 'BR' in countries_in_industry:
if industry == 'Real Estate Management & Development':
for col_br in trends_frames_br['Real Estate-BR'].columns:
if col_br in keywords_dict[industry]['BR'].values:
trends_frames[industry]['BR'][col_br] = trends_frames_br['Real Estate-BR'][col_br].dropna()
else:
for col_br in trends_frames_br[industry+'-BR'].columns:
if col_br in keywords_dict[industry]['BR'].values:
trends_frames[industry]['BR'][col_br] = trends_frames_br[industry+'-BR'][col_br].dropna()
deseason = True
n_words = 5
for industry in keywords_dict.keys():
if not industry == 'Restaurantes':
countries_in_industry = keywords_dict[industry].columns
for c in countries_in_industry:
trends_frames[industry][c] = trends_frames[industry][c].loc[:, trends_frames[industry][c].columns.notnull()]
summary = pd.DataFrame()
fig1 = make_subplots(rows=2, cols=1,
subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'],
horizontal_spacing=0.6, )
fig2 = make_subplots(rows=2, cols=1,
subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'],
horizontal_spacing=0.6)
for industry, dict_ in trends_frames.items():
for country, df_ in dict_.items():
if deseason:
df_ = pd.DataFrame({col: df_[col] -
seasonal_decompose(df_[col], period=8).seasonal
for col in df_.columns})
summary[f'{industry}-{country}'] = df_.mean(1)
summary = (summary - summary.mean()) / summary.std()
delta_w = summary.diff(1).iloc[-1].sort_values(ascending=True)
delta_m = summary.diff(4).iloc[-1].sort_values(ascending=True)
delta_3m = summary.diff(12).iloc[-1].sort_values(ascending=True)
delta_ytd = summary.resample('Y').last().diff().iloc[-1].sort_values(ascending=True)
fig1.add_trace(go.Bar(x=delta_w.array, y=delta_w.index, orientation='h', marker_color=colors2[2],
showlegend=False), row=1, col=1)
fig2.add_trace(go.Bar(x=delta_m.array, y=delta_m.index, orientation='h', marker_color=colors2[2],
showlegend=False), row=1, col=1)
fig1.add_trace(go.Bar(x=delta_3m.array, y=delta_3m.index, orientation='h', marker_color=colors2[2],
showlegend=False), row=2, col=1)
fig2.add_trace(go.Bar(x=delta_ytd.array, y=delta_ytd.index, orientation='h', marker_color=colors2[2],
showlegend=False), row=2, col=1)
fig1.update_layout(title_text='Cambios en las B煤squedas', margin_b=0, margin_t=50, margin_r=0, margin_l=0)
fig2.update_layout(margin_b=0, margin_t=50, margin_r=0, margin_l=0)
col1.plotly_chart(fig1, use_container_width=True)
col2.plotly_chart(fig2, use_container_width=True)
for industry in trends_frames:
for i, (country, data) in enumerate(trends_frames[industry].items()):
if countries_dict[country] in select_countries:
fig_indices1 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}],
[{"secondary_y": False}]],
subplot_titles=['GT (zscore) vs Spread Hist贸rico',
'Variaci贸n YoY GT Hist贸rico'],
horizontal_spacing=0.)
fig_indices2 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}],
[{"secondary_y": False}]],
subplot_titles=['GT (zscore) vs Spread 脷ltimo A帽o',
'Variaci贸n YoY GT 脷ltimo a帽o'],
horizontal_spacing=0.3)
aux_df = summary[f'{industry}-{country}']
aux_df.index.name = ''
mm_year = aux_df.rolling(52).mean()
mm_half = aux_df.rolling(26).mean()
mm_quarter = aux_df.rolling(13).mean()
mm_month = aux_df.rolling(4).mean()
mm = pd.concat([mm_year, mm_half, mm_quarter, mm_month], axis=1)
mm.columns = ['1Y', '6M', '3M', '1M']
if deseason:
p = '3M'
else:
p = '1Y'
fig_indices1.add_trace(go.Scatter(x=mm[p].index, y=mm[p].array, line=dict(color=colors2[0]),
showlegend=True, name=f'{p} MM GT Index'),
secondary_y=False, row=1, col=1)
fig_indices1.update_layout(title_text=f'{industry} - {country}')
fig_indices2.add_trace(
go.Scatter(x=mm[p].iloc[-52:].index, y=mm[p].iloc[-52:].array, line=dict(color=colors2[0]),
showlegend=False, name=f'{p} MM GT Index'),
secondary_y=False, row=1, col=1)
mm_4w = data.mean(1).rolling(4).mean()
yoy = mm_4w.pct_change(52)
aux2 = pd.concat([yoy], axis=1)
aux2.columns = ['YoY']
aux2.index.name = ''
fig_indices1.add_trace(go.Bar(x=aux2.dropna().index, y=aux2.dropna()['YoY'],
marker_color=colors2[1], showlegend=False), row=2, col=1)
fig_indices2.add_trace(go.Bar(x=aux2.dropna().iloc[-52:].index, y=aux2.dropna()['YoY'].iloc[-52:].array,
marker_color=colors2[1], showlegend=False), row=2, col=1)
if country == 'US' and industry == 'Pesca':
country_ = 'Chile'
else:
country_ = countries_dict[country]
spread_mw = (mw_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) -
country_index[country_]['MW'].rolling(52).apply(lambda x: (1 + x).prod()))
spread_ew = (ew_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) -
country_index[country_]['EW'].rolling(52).apply(lambda x: (1 + x).prod()))
spread = pd.DataFrame({'EW': spread_ew, 'MW': spread_mw})
fig_indices1.add_trace(go.Scatter(x=spread['MW'].dropna().index, y=spread['MW'].dropna().array,
name='Spread MW', line=dict(color=colors2[3])),
secondary_y=True, row=1, col=1)
fig_indices2.add_trace(
go.Scatter(x=spread['MW'].iloc[-260:].dropna().index, y=spread['MW'].iloc[-260:].dropna().array,
name='Spread MW', line=dict(color=colors2[3])),
secondary_y=True, row=1, col=1)
fig_indices1.update_xaxes(showticklabels=False)
fig_indices2.update_xaxes(showticklabels=False)
fig_indices1.layout.update(xaxis_rangeslider_visible=False, margin_b=20,
margin_r=20, margin_l=20,
legend=dict(orientation="h",
yanchor="top",
y=0.6,
xanchor="right",
x=1))
fig_indices2.layout.update(xaxis_rangeslider_visible=False,
margin_b=20,
margin_r=20, margin_l=20,
legend=dict(orientation="h",
yanchor="top",
y=0.6,
xanchor="right",
x=1))
fig_indices1.update_xaxes(showticklabels=True, row=2,
col=1)
fig_indices2.update_xaxes(showticklabels=True, row=2,
col=1)
fig_indices1.update_yaxes(tickformat=',.0%', row=2, col=1)
fig_indices2.update_yaxes(tickformat=',.0%', row=2, col=1)
if deseason:
df1 = pd.DataFrame({col: data[col] -
seasonal_decompose(data[col]).seasonal
for col in data.columns})
else:
df1 = data
# Top word's table plot
last_week = df1.iloc[-1].sort_values(ascending=False)[:n_words] / 100
all_time = df1.mean().sort_values(ascending=False)[:n_words] / 100
fig_W = make_subplots(subplot_titles=['Top Words en ' + f'{industry} - {country}'])
table = pd.concat([pd.Series(last_week.index),
pd.Series(all_time.index)], axis=1)
table.columns = ['Top 1W', 'Top 5Y']
fig_W.add_trace(go.Table(header=dict(values=table.columns),
cells=dict(values=[table['Top 1W'].values, table['Top 5Y'].values])))
fig_W.update_layout(margin_b=0, margin_t=50,
margin_r=0, margin_l=0,
height=200)
fig_indices1.update_layout(margin_b=0, margin_t=50,
margin_r=0, margin_l=0,
height=600)
fig_indices2.update_layout(margin_b=0, margin_t=50,
margin_r=0, margin_l=0,
height=600)
col1.plotly_chart(fig_indices1, use_container_width=True)
col2.plotly_chart(fig_indices2, use_container_width=True)
fig_W.update_layout(margin_b=0, margin_t=30, margin_r=10, margin_l=0)
st.plotly_chart(fig_W, use_container_width=True)