File size: 2,549 Bytes
fabbccf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
import numpy as np


def get_time_features(data):
    df = pd.DataFrame()
    df['timestamp'] = data['timestamp']
    df['hour'] = data['timestamp'].dt.hour
    df['day_of_week'] = data['timestamp'].dt.dayofweek
    df['day'] = data['timestamp'].dt.day
    df['month'] = data['timestamp'].dt.month

    season_label = lambda x: 0 if 3 <= x <= 5 else 1 if 6 <= x <= 8 else 2 if 9 <= x <= 11 else 3
    df['season'] = df['month'].apply(season_label)

    cols = df.columns[1::]
    for i in cols:
        max_values = df[i].max()
        df[f'{i}_sin'] = np.sin(2 * np.pi * df[i] / max_values)
        df[f'{i}_cos'] = np.cos(2 * np.pi * df[i] / max_values)

    time_label = lambda x: 0 if 6 <= x <= 11 else 1 if 12 <= x <= 17 else 2 if 18 <= x <= 21 else 3
    df['part_of_day'] = df['hour'].apply(time_label)
    working_hours_label = lambda x: 1 if 9 <= x < 17 else 0
    df['is_working_hours'] = df['hour'].apply(working_hours_label)

    is_weekend_label = lambda x: 1 if x >= 5 else 0
    df['is_weekend'] = df['day_of_week'].apply(is_weekend_label)

    return df


def get_ts_features(data, time, column):
    data = data[['timestamp', column]].copy()
    data.set_index('timestamp', inplace=True)

    # Create a rolling window with the given time span
    rolling_window = data[column].rolling(f'{time}H', closed='both')

    # Calculate the desired statistics
    df = pd.DataFrame(index=data.index)
    df[f'exact_{time}'] = data[column]
    df[f'mean_{time}'] = rolling_window.mean()
    df[f'median_{time}'] = rolling_window.median()
    df[f'std_{time}'] = rolling_window.std()
    df[f'min_{time}'] = rolling_window.min()
    df[f'max_{time}'] = rolling_window.max()

    # Fill NaN values with -1 for consistency with the original code
    df.fillna(-1, inplace=True)

    return df.reset_index()


def get_all_ts_features(data, column):
    res = pd.DataFrame()
    res['timestamp'] = data['timestamp']
    values = [1, 3, 6, 12, 24, 24 * 2, 24 * 4, 24 * 8]
    for i in values:
        features_df = get_ts_features(data, i, column)
        res = res.merge(features_df, on='timestamp', how='left')

    return res


def get_all_features(df, devices):
    res = dict()
    for i in devices:
        res[i] = pd.DataFrame()

    f = get_time_features(df)
    for k in devices:
        t = get_all_ts_features(df, k)
        combined = f.merge(t, on='timestamp', how='left')
        combined['type'] = 0
        res[k] = pd.concat([res[k], combined.drop(['timestamp'], axis=1)], ignore_index=True)

    return res