Spaces:
Configuration error
Configuration error
File size: 4,915 Bytes
bb112ef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 23 09:27:21 2021
@author: benjaminull
"""
import pandas as pd
from datetime import datetime
import numpy as np
from datetime import timedelta
import requests
import io
import openpyxl
from pandas import ExcelWriter
import requests
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import requests
def GenerarExcel(ruta_guardado, Pestañas, Data):
wb = openpyxl.Workbook()
writer = ExcelWriter(ruta_guardado)
for pestaña in Pestañas:
wb.create_sheet(pestaña)
std = wb.get_sheet_by_name('Sheet')
wb.remove_sheet(std)
wb.save(ruta_guardado)
for i in range(len(Pestañas)):
print(Data[i])
Data[i].to_excel(writer, sheet_name=Pestañas[i])
writer.save()
def run_data_covid():
options = webdriver.ChromeOptions()
options.binary_location = r'C:/Program Files/Google/Chrome/Application/chrome.exe'
path_to_chromedriver = r'C:/Users/bullm/larrainvial.com/Equipo Quant - Area Estrategias Cuantitativas 2.0/Codigos\Data Alternativa/Transcripts/chromedriver.exe'
browser = webdriver.Chrome(executable_path=path_to_chromedriver, chrome_options=options)
# Ir a página deseada
url = 'https://covid19.apple.com/mobility'
browser.get(url)
page = requests.get(url)
html=browser.page_source
soup = BeautifulSoup(html, "html.parser")
link = str(soup.find_all("a")[1]).split('"')[1]
link_apple = "https://covid19.apple.com/mobility"
r = requests.get(link_apple)
soup = BeautifulSoup(r.text, 'lxml')
data_agg = pd.DataFrame()
i = 0
for chunk in pd.read_csv(
'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv',
usecols=['country_region', 'date',
'retail_and_recreation_percent_change_from_baseline',
'grocery_and_pharmacy_percent_change_from_baseline',
'parks_percent_change_from_baseline',
'transit_stations_percent_change_from_baseline',
'workplaces_percent_change_from_baseline'],
dtype = {"workplaces_percent_change_from_baseline":
"float32",
"parks_percent_change_from_baseline": "float32",
"retail_and_recreation_percent_change_from_baseline":
"float32",
'transit_stations_percent_change_from_baseline':
"float32",
},chunksize = 150000):
if i == 65:
break
i=i+1
data_agg = pd.concat([data_agg, chunk], ignore_index=True)
data_agg.info(memory_usage="deep")
data_agg.set_index(['country_region', 'date'], inplace=True)
data_agg = data_agg.groupby(level=[0, 1]).mean()
data_agg.columns = data_agg.columns.str.replace('_percent_change_from_baseline', '_google')
yesterday = (datetime.today() - timedelta(2)).strftime("%Y-%m-%d")
url=f''+link
CONFIRMED_CONTENT = requests.get(url).content
data_app = pd.read_csv(io.StringIO(CONFIRMED_CONTENT.decode('utf-8')),
error_bad_lines=False)
# Dejamos solo la data a nivel pais
data_app.info(memory_usage="deep")
data_app = data_app.loc[data_app['geo_type'] == 'country/region']
data_app = data_app.drop(columns=['geo_type', 'country',
'alternative_name', 'sub-region'])
data_app = data_app.set_index(['region', 'transportation_type']).stack()
data_app = data_app.unstack(level='transportation_type') - 100
data_app.index.names = data_agg.index.names
data_agg = data_agg.join(data_app)
print(data_app.columns)
mob_idx_cols = ['retail_and_recreation_google', 'grocery_and_pharmacy_google',
'parks_google', 'transit_stations_google',
'workplaces_google', 'driving', 'transit', 'walking']
data_agg['Mobility Index'] = data_agg[mob_idx_cols].mean(1)
regiones = {}
regiones['Latam'] = ['Argentina', 'Brazil', 'Chile', 'Colombia',
'Mexico', 'Peru']
regiones['Europa'] = ['Italy', 'Spain', 'Germany', 'United Kingdom', 'France']
regiones['Asia Emergente'] = ['South Korea', 'Taiwan', 'Hong Kong', 'India',
'Thailand', 'Indonesia']
regiones['USA'] = ['United States']
# regiones['Israel'] = ['Israel']
data_dict = {}
for col in data_agg.columns:
df = data_agg[col].unstack().T.rolling(7, 3).mean()
for region, paises in regiones.items():
df[region] = df[paises].mean(1)
data_dict[col] = df
GenerarExcel("Scheduler/Movilidad_desagrada.xlsx", list(data_dict.keys()),
list(data_dict.values()))
np.save('Scheduler/dict_movilidad.npy', data_dict)
|