PortalLVAM / Scheduler /Scheduler_Covid.py
bullm's picture
Upload Scheduler_Covid.py
bb112ef
raw
history blame
4.92 kB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 23 09:27:21 2021
@author: benjaminull
"""
import pandas as pd
from datetime import datetime
import numpy as np
from datetime import timedelta
import requests
import io
import openpyxl
from pandas import ExcelWriter
import requests
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import requests
def GenerarExcel(ruta_guardado, Pestañas, Data):
wb = openpyxl.Workbook()
writer = ExcelWriter(ruta_guardado)
for pestaña in Pestañas:
wb.create_sheet(pestaña)
std = wb.get_sheet_by_name('Sheet')
wb.remove_sheet(std)
wb.save(ruta_guardado)
for i in range(len(Pestañas)):
print(Data[i])
Data[i].to_excel(writer, sheet_name=Pestañas[i])
writer.save()
def run_data_covid():
options = webdriver.ChromeOptions()
options.binary_location = r'C:/Program Files/Google/Chrome/Application/chrome.exe'
path_to_chromedriver = r'C:/Users/bullm/larrainvial.com/Equipo Quant - Area Estrategias Cuantitativas 2.0/Codigos\Data Alternativa/Transcripts/chromedriver.exe'
browser = webdriver.Chrome(executable_path=path_to_chromedriver, chrome_options=options)
# Ir a página deseada
url = 'https://covid19.apple.com/mobility'
browser.get(url)
page = requests.get(url)
html=browser.page_source
soup = BeautifulSoup(html, "html.parser")
link = str(soup.find_all("a")[1]).split('"')[1]
link_apple = "https://covid19.apple.com/mobility"
r = requests.get(link_apple)
soup = BeautifulSoup(r.text, 'lxml')
data_agg = pd.DataFrame()
i = 0
for chunk in pd.read_csv(
'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv',
usecols=['country_region', 'date',
'retail_and_recreation_percent_change_from_baseline',
'grocery_and_pharmacy_percent_change_from_baseline',
'parks_percent_change_from_baseline',
'transit_stations_percent_change_from_baseline',
'workplaces_percent_change_from_baseline'],
dtype = {"workplaces_percent_change_from_baseline":
"float32",
"parks_percent_change_from_baseline": "float32",
"retail_and_recreation_percent_change_from_baseline":
"float32",
'transit_stations_percent_change_from_baseline':
"float32",
},chunksize = 150000):
if i == 65:
break
i=i+1
data_agg = pd.concat([data_agg, chunk], ignore_index=True)
data_agg.info(memory_usage="deep")
data_agg.set_index(['country_region', 'date'], inplace=True)
data_agg = data_agg.groupby(level=[0, 1]).mean()
data_agg.columns = data_agg.columns.str.replace('_percent_change_from_baseline', '_google')
yesterday = (datetime.today() - timedelta(2)).strftime("%Y-%m-%d")
url=f''+link
CONFIRMED_CONTENT = requests.get(url).content
data_app = pd.read_csv(io.StringIO(CONFIRMED_CONTENT.decode('utf-8')),
error_bad_lines=False)
# Dejamos solo la data a nivel pais
data_app.info(memory_usage="deep")
data_app = data_app.loc[data_app['geo_type'] == 'country/region']
data_app = data_app.drop(columns=['geo_type', 'country',
'alternative_name', 'sub-region'])
data_app = data_app.set_index(['region', 'transportation_type']).stack()
data_app = data_app.unstack(level='transportation_type') - 100
data_app.index.names = data_agg.index.names
data_agg = data_agg.join(data_app)
print(data_app.columns)
mob_idx_cols = ['retail_and_recreation_google', 'grocery_and_pharmacy_google',
'parks_google', 'transit_stations_google',
'workplaces_google', 'driving', 'transit', 'walking']
data_agg['Mobility Index'] = data_agg[mob_idx_cols].mean(1)
regiones = {}
regiones['Latam'] = ['Argentina', 'Brazil', 'Chile', 'Colombia',
'Mexico', 'Peru']
regiones['Europa'] = ['Italy', 'Spain', 'Germany', 'United Kingdom', 'France']
regiones['Asia Emergente'] = ['South Korea', 'Taiwan', 'Hong Kong', 'India',
'Thailand', 'Indonesia']
regiones['USA'] = ['United States']
# regiones['Israel'] = ['Israel']
data_dict = {}
for col in data_agg.columns:
df = data_agg[col].unstack().T.rolling(7, 3).mean()
for region, paises in regiones.items():
df[region] = df[paises].mean(1)
data_dict[col] = df
GenerarExcel("Scheduler/Movilidad_desagrada.xlsx", list(data_dict.keys()),
list(data_dict.values()))
np.save('Scheduler/dict_movilidad.npy', data_dict)