Spaces:
Configuration error
Configuration error
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Created on Thu Sep 23 09:27:21 2021 | |
@author: benjaminull | |
""" | |
import pandas as pd | |
from datetime import datetime | |
import numpy as np | |
from datetime import timedelta | |
import requests | |
import io | |
import openpyxl | |
from pandas import ExcelWriter | |
import requests | |
from bs4 import BeautifulSoup | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
from selenium import webdriver | |
import requests | |
def GenerarExcel(ruta_guardado, Pestañas, Data): | |
wb = openpyxl.Workbook() | |
writer = ExcelWriter(ruta_guardado) | |
for pestaña in Pestañas: | |
wb.create_sheet(pestaña) | |
std = wb.get_sheet_by_name('Sheet') | |
wb.remove_sheet(std) | |
wb.save(ruta_guardado) | |
for i in range(len(Pestañas)): | |
print(Data[i]) | |
Data[i].to_excel(writer, sheet_name=Pestañas[i]) | |
writer.save() | |
def run_data_covid(): | |
options = webdriver.ChromeOptions() | |
options.binary_location = r'C:/Program Files/Google/Chrome/Application/chrome.exe' | |
path_to_chromedriver = r'C:/Users/bullm/larrainvial.com/Equipo Quant - Area Estrategias Cuantitativas 2.0/Codigos\Data Alternativa/Transcripts/chromedriver.exe' | |
browser = webdriver.Chrome(executable_path=path_to_chromedriver, chrome_options=options) | |
# Ir a página deseada | |
url = 'https://covid19.apple.com/mobility' | |
browser.get(url) | |
page = requests.get(url) | |
html=browser.page_source | |
soup = BeautifulSoup(html, "html.parser") | |
link = str(soup.find_all("a")[1]).split('"')[1] | |
link_apple = "https://covid19.apple.com/mobility" | |
r = requests.get(link_apple) | |
soup = BeautifulSoup(r.text, 'lxml') | |
data_agg = pd.DataFrame() | |
i = 0 | |
for chunk in pd.read_csv( | |
'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv', | |
usecols=['country_region', 'date', | |
'retail_and_recreation_percent_change_from_baseline', | |
'grocery_and_pharmacy_percent_change_from_baseline', | |
'parks_percent_change_from_baseline', | |
'transit_stations_percent_change_from_baseline', | |
'workplaces_percent_change_from_baseline'], | |
dtype = {"workplaces_percent_change_from_baseline": | |
"float32", | |
"parks_percent_change_from_baseline": "float32", | |
"retail_and_recreation_percent_change_from_baseline": | |
"float32", | |
'transit_stations_percent_change_from_baseline': | |
"float32", | |
},chunksize = 150000): | |
if i == 65: | |
break | |
i=i+1 | |
data_agg = pd.concat([data_agg, chunk], ignore_index=True) | |
data_agg.info(memory_usage="deep") | |
data_agg.set_index(['country_region', 'date'], inplace=True) | |
data_agg = data_agg.groupby(level=[0, 1]).mean() | |
data_agg.columns = data_agg.columns.str.replace('_percent_change_from_baseline', '_google') | |
yesterday = (datetime.today() - timedelta(2)).strftime("%Y-%m-%d") | |
url=f''+link | |
CONFIRMED_CONTENT = requests.get(url).content | |
data_app = pd.read_csv(io.StringIO(CONFIRMED_CONTENT.decode('utf-8')), | |
error_bad_lines=False) | |
# Dejamos solo la data a nivel pais | |
data_app.info(memory_usage="deep") | |
data_app = data_app.loc[data_app['geo_type'] == 'country/region'] | |
data_app = data_app.drop(columns=['geo_type', 'country', | |
'alternative_name', 'sub-region']) | |
data_app = data_app.set_index(['region', 'transportation_type']).stack() | |
data_app = data_app.unstack(level='transportation_type') - 100 | |
data_app.index.names = data_agg.index.names | |
data_agg = data_agg.join(data_app) | |
print(data_app.columns) | |
mob_idx_cols = ['retail_and_recreation_google', 'grocery_and_pharmacy_google', | |
'parks_google', 'transit_stations_google', | |
'workplaces_google', 'driving', 'transit', 'walking'] | |
data_agg['Mobility Index'] = data_agg[mob_idx_cols].mean(1) | |
regiones = {} | |
regiones['Latam'] = ['Argentina', 'Brazil', 'Chile', 'Colombia', | |
'Mexico', 'Peru'] | |
regiones['Europa'] = ['Italy', 'Spain', 'Germany', 'United Kingdom', 'France'] | |
regiones['Asia Emergente'] = ['South Korea', 'Taiwan', 'Hong Kong', 'India', | |
'Thailand', 'Indonesia'] | |
regiones['USA'] = ['United States'] | |
# regiones['Israel'] = ['Israel'] | |
data_dict = {} | |
for col in data_agg.columns: | |
df = data_agg[col].unstack().T.rolling(7, 3).mean() | |
for region, paises in regiones.items(): | |
df[region] = df[paises].mean(1) | |
data_dict[col] = df | |
GenerarExcel("Scheduler/Movilidad_desagrada.xlsx", list(data_dict.keys()), | |
list(data_dict.values())) | |
np.save('Scheduler/dict_movilidad.npy', data_dict) | |