File size: 4,915 Bytes
bb112ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Sep 23 09:27:21 2021

@author: benjaminull
"""

import pandas as pd
from datetime import datetime
import numpy as np
from datetime import timedelta
import requests
import io
import openpyxl
from pandas import ExcelWriter
import requests
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
import requests



def GenerarExcel(ruta_guardado, Pestañas, Data):
    wb = openpyxl.Workbook()
    writer = ExcelWriter(ruta_guardado)
    for pestaña in Pestañas:
        wb.create_sheet(pestaña)
    std = wb.get_sheet_by_name('Sheet')
    wb.remove_sheet(std)
    wb.save(ruta_guardado)
    for i in range(len(Pestañas)):
        print(Data[i])
        Data[i].to_excel(writer, sheet_name=Pestañas[i])
    writer.save()


def run_data_covid():
    options = webdriver.ChromeOptions()
    options.binary_location = r'C:/Program Files/Google/Chrome/Application/chrome.exe'
    path_to_chromedriver = r'C:/Users/bullm/larrainvial.com/Equipo Quant - Area Estrategias Cuantitativas 2.0/Codigos\Data Alternativa/Transcripts/chromedriver.exe'
    browser = webdriver.Chrome(executable_path=path_to_chromedriver, chrome_options=options)
    # Ir a página deseada
    url = 'https://covid19.apple.com/mobility'
    browser.get(url)
    page = requests.get(url)
    html=browser.page_source
    soup = BeautifulSoup(html, "html.parser")
    link = str(soup.find_all("a")[1]).split('"')[1]
    link_apple = "https://covid19.apple.com/mobility"
    r = requests.get(link_apple)
    soup = BeautifulSoup(r.text, 'lxml')
    data_agg = pd.DataFrame()
    i = 0
    for chunk in pd.read_csv(
        'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv',
        usecols=['country_region', 'date',
                 'retail_and_recreation_percent_change_from_baseline',
                 'grocery_and_pharmacy_percent_change_from_baseline',
                 'parks_percent_change_from_baseline',
                 'transit_stations_percent_change_from_baseline',
                 'workplaces_percent_change_from_baseline'],
                 dtype = {"workplaces_percent_change_from_baseline":
                          "float32",
                          "parks_percent_change_from_baseline": "float32",
                          "retail_and_recreation_percent_change_from_baseline":
                              "float32",
                          'transit_stations_percent_change_from_baseline':
                              "float32",
                         },chunksize = 150000):
        if i == 65:
            break
        i=i+1
        data_agg = pd.concat([data_agg, chunk], ignore_index=True)
    data_agg.info(memory_usage="deep") 
    data_agg.set_index(['country_region', 'date'], inplace=True)
    data_agg = data_agg.groupby(level=[0, 1]).mean()
    data_agg.columns = data_agg.columns.str.replace('_percent_change_from_baseline', '_google')
    yesterday = (datetime.today() - timedelta(2)).strftime("%Y-%m-%d")
    url=f''+link
    CONFIRMED_CONTENT = requests.get(url).content
    data_app = pd.read_csv(io.StringIO(CONFIRMED_CONTENT.decode('utf-8')),
                           error_bad_lines=False)
    # Dejamos solo la data a nivel pais
    data_app.info(memory_usage="deep")
    data_app = data_app.loc[data_app['geo_type'] == 'country/region']
    data_app = data_app.drop(columns=['geo_type', 'country',
                                      'alternative_name', 'sub-region'])
    data_app = data_app.set_index(['region', 'transportation_type']).stack()
    data_app = data_app.unstack(level='transportation_type') - 100
    data_app.index.names = data_agg.index.names
    data_agg = data_agg.join(data_app)
    print(data_app.columns)
    mob_idx_cols = ['retail_and_recreation_google', 'grocery_and_pharmacy_google',
                    'parks_google', 'transit_stations_google',
                    'workplaces_google', 'driving', 'transit', 'walking']
    data_agg['Mobility Index'] = data_agg[mob_idx_cols].mean(1)
    regiones = {}
    regiones['Latam'] = ['Argentina', 'Brazil', 'Chile', 'Colombia',
                         'Mexico', 'Peru']
    regiones['Europa'] = ['Italy', 'Spain', 'Germany', 'United Kingdom', 'France']
    regiones['Asia Emergente'] = ['South Korea', 'Taiwan', 'Hong Kong', 'India',
                                  'Thailand', 'Indonesia']
    regiones['USA'] = ['United States']
    # regiones['Israel'] = ['Israel']
    data_dict = {}
    for col in data_agg.columns:
        df = data_agg[col].unstack().T.rolling(7, 3).mean()
        for region, paises in regiones.items():
            df[region] = df[paises].mean(1)
        data_dict[col] = df
    GenerarExcel("Scheduler/Movilidad_desagrada.xlsx", list(data_dict.keys()),
                 list(data_dict.values()))
    np.save('Scheduler/dict_movilidad.npy', data_dict)