Spaces:

bullm
/

PortalLVAM

Configuration error

App Files Files Community

PortalLVAM / apps /Google_Trends.py

bullm

ldsakjl

025632f over 2 years ago

raw

history blame contribute delete

25.5 kB

	from modules import tables
	from google_tools import trends as gtrends
	import pandas as pd
	import numpy as np
	from datetime import timedelta, date
	from statsmodels.tsa.seasonal import seasonal_decompose
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	import streamlit as st
	import io
	import boto3
	import openpyxl


	key ='AKIARYMZ4J2YQDB66VX4'
	secret_key = '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'
	bucket='portallvam'
	path ='Momentum.xlsx'


	def save_s3(key, secret_key, bucket, df, path):
	with io.BytesIO() as output:
	with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
	for industry in df.keys():
	df[industry].to_excel(writer, sheet_name=industry)
	data = output.getvalue()
	s3 = boto3.resource('s3', aws_access_key_id=key, aws_secret_access_key=secret_key)
	s3.Bucket(bucket).put_object(Key=path, Body=data)


	def read_excel_s3(key, secret_key, bucket, path):
	s3_client = boto3.client('s3', aws_access_key_id=key, aws_secret_access_key=secret_key)
	response = s3_client.get_object(Bucket=bucket, Key=path)
	data = response["Body"].read()
	df = pd.read_excel(io.BytesIO(data), sheet_name=None, index_col='Unnamed: 0.1')
	return df


	def generar_excel(ruta_guardado, Pestanas, Data):
	wb = openpyxl.Workbook()
	writer = pd.ExcelWriter(ruta_guardado)
	for pestana in Pestanas:
	wb.create_sheet(pestana)
	std = wb.get_sheet_by_name('Sheet')
	wb.remove_sheet(std)
	wb.save(ruta_guardado)
	for i, pestana in enumerate(Pestanas):
	if pestana=='Real Estate Management & Development-CL':
	pestana = 'Real Estate-CL'
	Data['Real Estate Management & Development-CL'].to_excel(writer, sheet_name=pestana)
	elif pestana=='Real Estate Management & Development-BR':
	pestana = 'Real Estate-BR'
	Data['Real Estate Management & Development-BR'].to_excel(writer, sheet_name=pestana)
	else:
	Data[pestana].to_excel(writer, sheet_name=Pestanas[i])
	writer.save()


	def colores_corporativos(colors=None):

	color_dict = {'red': (204, 0, 51),
	'light_blue': (110, 162, 201),
	'light_gray': (135, 146, 158),
	'grey': (105, 105, 105),
	'yellow': (195, 195, 9),
	'dark_purple': (119, 28, 95),
	'blue': (42, 83, 113),
	'purple': (159, 37, 127),
	'light_yellow': (252, 252, 196),
	'light_green': (122, 178, 153),
	'gray': (66, 74, 82)}

	for key in color_dict:
	color_dict[key] = tuple(v/255 for v in color_dict[key])

	if colors is None:
	return color_dict
	else:
	aux = {col: color_dict[col] for col in colors}
	return aux


	corp_colors = list(colores_corporativos().values())
	colors2 = []
	for i in range(len(corp_colors)):
	colors2.append("rgb" + str(corp_colors[i]))


	company_db = pd.read_excel('Data/Company_Base_Definitivo.xlsx', sheet_name='Compilado')
	id_to_ticker = {str(row['ID_Quant']): str(row['Ticker Bloomberg']).split()[0] for i, row in company_db.iterrows()}

	countries_dict = {'BR': 'Brazil', 'CL': 'Chile', 'US': 'Brazil',
	'US-Disease': 'Brazil'}


	@st.cache(suppress_st_warning=True)
	def data_request(countries, start, currency='USD'):
	close_price = {'Brazil': [],
	'Chile': []}
	market_cap = {'Brazil': [],
	'Chile': []}
	for c in countries:
	close_price[c] = tables.EquityMaster(field='IQ_CLOSEPRICE_ADJ', currency=currency, country=c).query(
	rename=['asset'], start=start, expand=True)
	market_cap[c] = tables.EquityMaster(field='IQ_MARKETCAP', currency=currency, country=c).query(
	start=start, rename=['asset'], expand=True)

	close_price[c] = close_price[c].loc[:, close_price[c].columns.isin(id_to_ticker.keys())]
	close_price[c].columns = [id_to_ticker[col] for col in close_price[c].columns]

	market_cap[c] = market_cap[c].loc[:, market_cap[c].columns.isin(id_to_ticker.keys())]
	market_cap[c].columns = [id_to_ticker[col] for col in market_cap[c].columns]
	return [close_price, market_cap]


	@st.cache(suppress_st_warning=True)
	def trends_request(keywords, today):
	trends_frames_dict = {}
	for sector, values in keywords.items():
	if not (sector in ['Restaurantes']):
	trends_frames_dict[sector] = {}
	print('Buscando para ' + sector)
	for country_name in values.columns:
	words = values[country_name].dropna()
	if '-' in country_name:
	fixed_country_name = country_name.split('-')[0].strip()
	else:
	fixed_country_name = country_name
	words_index = pd.DataFrame()
	for word in words:
	new_data = gtrends.keyword_trend(word, fixed_country_name, end_date=today)
	if new_data is not None:
	words_index = pd.concat([words_index,
	new_data],
	axis=1)
	else:
	print('No se encuentra data para ' + word)
	trends_frames_dict[sector][country_name] = words_index
	trends_frames_dict[sector][country_name].index.name = None
	return trends_frames_dict


	def trends_frames_excel(dicc):
	sheets_cl = []
	sheets_br = []
	for key_1 in dicc.keys():
	for key_2 in dicc[key_1].keys():
	if key_2=='CL':
	sheets_cl .append(key_1 + '-' + key_2)
	else:
	sheets_br.append(key_1 + '-' + key_2)
	trends_frames_dict_cl = {}
	trends_frames_dict_br = {}
	for key_1 in dicc.keys():
	for key_2 in dicc[key_1].keys():
	if key_2=='CL':
	trends_frames_dict_cl[key_1 + '-'+ key_2] = dicc[key_1][key_2]
	elif key_2=='BR':
	trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2]
	elif key_2=='US' or key_2=='US-Disease':
	trends_frames_dict_br[key_1 + '-' + key_2] = dicc[key_1][key_2]

	generar_excel('Data/GT_CL.xlsx', sheets_cl, trends_frames_dict_cl)
	df_cl = pd.read_excel('Data/GT_CL.xlsx', sheet_name=None)
	st.write(df_cl)
	save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_cl, path='GT_CL.xlsx')

	generar_excel('Data/GT_BR.xlsx', sheets_br, trends_frames_dict_br)
	df_br = pd.read_excel('Data/GT_BR.xlsx', sheet_name=None)
	save_s3(key=key, secret_key=secret_key, bucket=bucket, df=df_br, path='GT_BR.xlsx')


	def read_trends_frames(country):
	if country=='CL':
	return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_CL.xlsx')
	elif country=='BR':
	return read_excel_s3(key=key, secret_key=secret_key, bucket=bucket, path='GT_BR.xlsx')


	def report():
	form = st.form('Report')
	start_date = str(date.today() - timedelta(5 * 365))
	select_countries = form.multiselect('¿Qué país(es) desea visualizar?', ['Todos', 'Chile', 'Brazil'])
	if 'Todos' in select_countries:
	select_countries = ['Chile', 'Brazil']
	update_data = form.form_submit_button("Actualizar Datos")
	accept = form.form_submit_button('Visualizar')
	col1, col2 = st.columns(2)

	if update_data:
	xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx')
	industry_filter = ['Pesca', 'Agricola', 'Financials-RP']
	keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names
	if sheet not in industry_filter}
	xls.close()
	del xls

	# Arreglamos una llave porque una hoja de excel alcanza el máximo de caracteres posible para un nombre.
	new_key = "Real Estate Management & Development"
	old_key = "Real Estate Management & Develo"
	keywords_dict[new_key] = keywords_dict.pop(old_key)

	trends_dict = trends_request(keywords_dict, date.today())
	trends_frames_excel(trends_dict)
	ud = pd.read_excel('Data/update_data.xlsx')
	ud = ud[ud['View'] != 'Google Trends']
	today = date.today().strftime('%d-%m-%Y')
	ud = ud.append({"View": 'Google Trends',
	"Last_Update": today}, ignore_index=True)
	ud.to_excel('Data/update_data.xlsx', index=False)

	if accept:
	close_price_dict, market_cap_dict = data_request(select_countries, start_date)

	ew_index = {}
	mw_index = {}
	country_index = {}

	if select_countries == ['Brazil']:
	dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index)
	.union(set(close_price_dict['Brazil'].index))))}
	elif select_countries == ['Chile']:
	dates = {'Chile': sorted(list(set(market_cap_dict['Chile'].index)
	.union(set(close_price_dict['Chile'].index))))}
	else:
	dates = {'Brazil': sorted(list(set(market_cap_dict['Brazil'].index)
	.union(set(close_price_dict['Brazil'].index)))),
	'Chile': sorted(list(set(market_cap_dict['Chile'].index)
	.union(set(close_price_dict['Chile'].index))))}

	for country in select_countries:
	mkt = market_cap_dict[country]
	cp = close_price_dict[country]
	w = mkt.div(mkt.sum(1).values, axis=0)
	rets = cp.pct_change()
	country_index[country] = pd.DataFrame({'MW': (w * rets).sum(1),
	'EW': rets.mean(1)}).fillna(0)
	industries_1 = np.unique(company_db[['LV1']].values)
	industries_2 = np.unique(company_db[['LV2']].values)
	industries = np.unique(np.concatenate([industries_1, industries_2]))
	df_mw_index = pd.DataFrame(columns=industries, index=dates[country])
	df_ew_index = pd.DataFrame(columns=industries, index=dates[country])
	for industry in industries:
	industry = str(industry)
	mc = mkt.loc[:, mkt.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])]
	prices = cp.loc[:, cp.columns.isin(company_db[company_db['LV1'] == industry]['Ticker'])]

	w = mc.div(mc.sum(1).values, axis=0)
	rets = prices.pct_change()
	df_mw_index[industry] = (w * rets).sum(1)
	df_ew_index[industry] = rets.mean(1)
	mw_index[country] = df_mw_index.fillna(0)
	ew_index[country] = df_ew_index.fillna(0)

	xls = pd.ExcelFile('Data/keywords_definitivas_mongo.xlsx')
	industry_filter = ['Pesca', 'Agricola', 'Financials-RP', 'Agriculture']
	keywords_dict = {sheet: xls.parse(sheet) for sheet in xls.sheet_names
	if sheet not in industry_filter}
	xls.close()
	del xls

	new_key = "Real Estate Management & Development"
	old_key = "Real Estate Management & Develo"
	keywords_dict[new_key] = keywords_dict.pop(old_key)

	trends_frames = {}
	trends_frames_cl = read_trends_frames('CL')
	trends_frames_br = read_trends_frames('BR')

	for key_cl in trends_frames_cl.keys():
	trends_frames_cl[key_cl] = trends_frames_cl[key_cl].drop(columns='Unnamed: 0')

	for key_br in trends_frames_br.keys():
	trends_frames_br[key_br] = trends_frames_br[key_br].drop(columns='Unnamed: 0')

	for industry in keywords_dict.keys():
	if not industry=='Restaurantes':
	countries_in_industry = keywords_dict[industry].columns
	trends_frames[industry] = {}
	for c in countries_in_industry:
	if c=='CL':
	if industry == 'Real Estate Management & Development':
	index = trends_frames_cl['Real Estate-CL'].index
	trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl['Real Estate-CL'].columns,
	index=index)
	else:
	index = trends_frames_cl[industry+'-CL'].index
	trends_frames_cl[industry+'-CL'] = trends_frames_cl[industry+'-CL'].loc[:, trends_frames_cl[industry+'-CL'].columns.notnull()]
	trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_cl[industry+'-CL'].columns,
	index=index)
	elif c=='BR':
	if industry == 'Real Estate Management & Development':
	index = trends_frames_br['Real Estate-BR'].index
	trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br['Real Estate-BR'].columns,
	index=index)
	else:
	index = trends_frames_br[industry + '-BR'].index
	trends_frames[industry][c] = pd.DataFrame(columns=trends_frames_br[industry+'-BR'].columns,
	index=index)
	if 'CL' in countries_in_industry:
	if industry == 'Real Estate Management & Development':
	for col_cl in trends_frames_cl['Real Estate-CL'].columns:
	if col_cl in keywords_dict[industry]['CL'].values:
	trends_frames[industry]['CL'][col_cl] = trends_frames_cl['Real Estate-CL'][col_cl].dropna()
	else:
	for col_cl in trends_frames_cl[industry+'-CL'].columns:
	if col_cl in keywords_dict[industry]['CL'].values:
	trends_frames[industry]['CL'][col_cl] = trends_frames_cl[industry+'-CL'][col_cl].dropna()
	if 'BR' in countries_in_industry:
	if industry == 'Real Estate Management & Development':
	for col_br in trends_frames_br['Real Estate-BR'].columns:
	if col_br in keywords_dict[industry]['BR'].values:
	trends_frames[industry]['BR'][col_br] = trends_frames_br['Real Estate-BR'][col_br].dropna()
	else:
	for col_br in trends_frames_br[industry+'-BR'].columns:
	if col_br in keywords_dict[industry]['BR'].values:
	trends_frames[industry]['BR'][col_br] = trends_frames_br[industry+'-BR'][col_br].dropna()
	deseason = True
	n_words = 5
	for industry in keywords_dict.keys():
	if not industry == 'Restaurantes':
	countries_in_industry = keywords_dict[industry].columns
	for c in countries_in_industry:
	trends_frames[industry][c] = trends_frames[industry][c].loc[:, trends_frames[industry][c].columns.notnull()]

	summary = pd.DataFrame()
	fig1 = make_subplots(rows=2, cols=1,
	subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'],
	horizontal_spacing=0.6, )
	fig2 = make_subplots(rows=2, cols=1,
	subplot_titles=['Cambio Semanal', 'Cambio 1 Mes', 'Cambio 3 Meses', 'Cambio YTD'],
	horizontal_spacing=0.6)
	for industry, dict_ in trends_frames.items():
	for country, df_ in dict_.items():
	if deseason:
	df_ = pd.DataFrame({col: df_[col] -
	seasonal_decompose(df_[col], period=8).seasonal
	for col in df_.columns})
	summary[f'{industry}-{country}'] = df_.mean(1)
	summary = (summary - summary.mean()) / summary.std()

	delta_w = summary.diff(1).iloc[-1].sort_values(ascending=True)
	delta_m = summary.diff(4).iloc[-1].sort_values(ascending=True)
	delta_3m = summary.diff(12).iloc[-1].sort_values(ascending=True)
	delta_ytd = summary.resample('Y').last().diff().iloc[-1].sort_values(ascending=True)

	fig1.add_trace(go.Bar(x=delta_w.array, y=delta_w.index, orientation='h', marker_color=colors2[2],
	showlegend=False), row=1, col=1)

	fig2.add_trace(go.Bar(x=delta_m.array, y=delta_m.index, orientation='h', marker_color=colors2[2],
	showlegend=False), row=1, col=1)

	fig1.add_trace(go.Bar(x=delta_3m.array, y=delta_3m.index, orientation='h', marker_color=colors2[2],
	showlegend=False), row=2, col=1)

	fig2.add_trace(go.Bar(x=delta_ytd.array, y=delta_ytd.index, orientation='h', marker_color=colors2[2],
	showlegend=False), row=2, col=1)

	fig1.update_layout(title_text='Cambios en las Búsquedas', margin_b=0, margin_t=50, margin_r=0, margin_l=0)
	fig2.update_layout(margin_b=0, margin_t=50, margin_r=0, margin_l=0)
	col1.plotly_chart(fig1, use_container_width=True)
	col2.plotly_chart(fig2, use_container_width=True)

	for industry in trends_frames:
	for i, (country, data) in enumerate(trends_frames[industry].items()):
	if countries_dict[country] in select_countries:

	fig_indices1 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}],
	[{"secondary_y": False}]],
	subplot_titles=['GT (zscore) vs Spread Histórico',
	'Variación YoY GT Histórico'],
	horizontal_spacing=0.)
	fig_indices2 = make_subplots(rows=2, cols=1, specs=[[{"secondary_y": True}],
	[{"secondary_y": False}]],
	subplot_titles=['GT (zscore) vs Spread Último Año',
	'Variación YoY GT Último año'],
	horizontal_spacing=0.3)

	aux_df = summary[f'{industry}-{country}']
	aux_df.index.name = ''
	mm_year = aux_df.rolling(52).mean()
	mm_half = aux_df.rolling(26).mean()
	mm_quarter = aux_df.rolling(13).mean()
	mm_month = aux_df.rolling(4).mean()
	mm = pd.concat([mm_year, mm_half, mm_quarter, mm_month], axis=1)
	mm.columns = ['1Y', '6M', '3M', '1M']
	if deseason:
	p = '3M'
	else:
	p = '1Y'

	fig_indices1.add_trace(go.Scatter(x=mm[p].index, y=mm[p].array, line=dict(color=colors2[0]),
	showlegend=True, name=f'{p} MM GT Index'),
	secondary_y=False, row=1, col=1)

	fig_indices1.update_layout(title_text=f'{industry} - {country}')
	fig_indices2.add_trace(
	go.Scatter(x=mm[p].iloc[-52:].index, y=mm[p].iloc[-52:].array, line=dict(color=colors2[0]),
	showlegend=False, name=f'{p} MM GT Index'),
	secondary_y=False, row=1, col=1)

	mm_4w = data.mean(1).rolling(4).mean()
	yoy = mm_4w.pct_change(52)
	aux2 = pd.concat([yoy], axis=1)
	aux2.columns = ['YoY']
	aux2.index.name = ''

	fig_indices1.add_trace(go.Bar(x=aux2.dropna().index, y=aux2.dropna()['YoY'],
	marker_color=colors2[1], showlegend=False), row=2, col=1)
	fig_indices2.add_trace(go.Bar(x=aux2.dropna().iloc[-52:].index, y=aux2.dropna()['YoY'].iloc[-52:].array,
	marker_color=colors2[1], showlegend=False), row=2, col=1)

	if country == 'US' and industry == 'Pesca':
	country_ = 'Chile'
	else:
	country_ = countries_dict[country]

	spread_mw = (mw_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) -
	country_index[country_]['MW'].rolling(52).apply(lambda x: (1 + x).prod()))
	spread_ew = (ew_index[country_][industry].rolling(52).apply(lambda x: (1 + x).prod()) -
	country_index[country_]['EW'].rolling(52).apply(lambda x: (1 + x).prod()))

	spread = pd.DataFrame({'EW': spread_ew, 'MW': spread_mw})

	fig_indices1.add_trace(go.Scatter(x=spread['MW'].dropna().index, y=spread['MW'].dropna().array,
	name='Spread MW', line=dict(color=colors2[3])),
	secondary_y=True, row=1, col=1)

	fig_indices2.add_trace(
	go.Scatter(x=spread['MW'].iloc[-260:].dropna().index, y=spread['MW'].iloc[-260:].dropna().array,
	name='Spread MW', line=dict(color=colors2[3])),
	secondary_y=True, row=1, col=1)

	fig_indices1.update_xaxes(showticklabels=False)
	fig_indices2.update_xaxes(showticklabels=False)
	fig_indices1.layout.update(xaxis_rangeslider_visible=False, margin_b=20,
	margin_r=20, margin_l=20,
	legend=dict(orientation="h",
	yanchor="top",
	y=0.6,
	xanchor="right",
	x=1))
	fig_indices2.layout.update(xaxis_rangeslider_visible=False,
	margin_b=20,
	margin_r=20, margin_l=20,
	legend=dict(orientation="h",
	yanchor="top",
	y=0.6,
	xanchor="right",
	x=1))
	fig_indices1.update_xaxes(showticklabels=True, row=2,
	col=1)
	fig_indices2.update_xaxes(showticklabels=True, row=2,
	col=1)

	fig_indices1.update_yaxes(tickformat=',.0%', row=2, col=1)
	fig_indices2.update_yaxes(tickformat=',.0%', row=2, col=1)

	if deseason:
	df1 = pd.DataFrame({col: data[col] -
	seasonal_decompose(data[col]).seasonal
	for col in data.columns})
	else:
	df1 = data

	# Top word's table plot
	last_week = df1.iloc[-1].sort_values(ascending=False)[:n_words] / 100
	all_time = df1.mean().sort_values(ascending=False)[:n_words] / 100

	fig_W = make_subplots(subplot_titles=['Top Words en ' + f'{industry} - {country}'])

	table = pd.concat([pd.Series(last_week.index),
	pd.Series(all_time.index)], axis=1)
	table.columns = ['Top 1W', 'Top 5Y']
	fig_W.add_trace(go.Table(header=dict(values=table.columns),
	cells=dict(values=[table['Top 1W'].values, table['Top 5Y'].values])))
	fig_W.update_layout(margin_b=0, margin_t=50,
	margin_r=0, margin_l=0,
	height=200)
	fig_indices1.update_layout(margin_b=0, margin_t=50,
	margin_r=0, margin_l=0,
	height=600)
	fig_indices2.update_layout(margin_b=0, margin_t=50,
	margin_r=0, margin_l=0,
	height=600)
	col1.plotly_chart(fig_indices1, use_container_width=True)
	col2.plotly_chart(fig_indices2, use_container_width=True)

	fig_W.update_layout(margin_b=0, margin_t=30, margin_r=10, margin_l=0)
	st.plotly_chart(fig_W, use_container_width=True)