Spaces:
Sleeping
Sleeping
File size: 2,549 Bytes
fabbccf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import pandas as pd
import numpy as np
def get_time_features(data):
df = pd.DataFrame()
df['timestamp'] = data['timestamp']
df['hour'] = data['timestamp'].dt.hour
df['day_of_week'] = data['timestamp'].dt.dayofweek
df['day'] = data['timestamp'].dt.day
df['month'] = data['timestamp'].dt.month
season_label = lambda x: 0 if 3 <= x <= 5 else 1 if 6 <= x <= 8 else 2 if 9 <= x <= 11 else 3
df['season'] = df['month'].apply(season_label)
cols = df.columns[1::]
for i in cols:
max_values = df[i].max()
df[f'{i}_sin'] = np.sin(2 * np.pi * df[i] / max_values)
df[f'{i}_cos'] = np.cos(2 * np.pi * df[i] / max_values)
time_label = lambda x: 0 if 6 <= x <= 11 else 1 if 12 <= x <= 17 else 2 if 18 <= x <= 21 else 3
df['part_of_day'] = df['hour'].apply(time_label)
working_hours_label = lambda x: 1 if 9 <= x < 17 else 0
df['is_working_hours'] = df['hour'].apply(working_hours_label)
is_weekend_label = lambda x: 1 if x >= 5 else 0
df['is_weekend'] = df['day_of_week'].apply(is_weekend_label)
return df
def get_ts_features(data, time, column):
data = data[['timestamp', column]].copy()
data.set_index('timestamp', inplace=True)
# Create a rolling window with the given time span
rolling_window = data[column].rolling(f'{time}H', closed='both')
# Calculate the desired statistics
df = pd.DataFrame(index=data.index)
df[f'exact_{time}'] = data[column]
df[f'mean_{time}'] = rolling_window.mean()
df[f'median_{time}'] = rolling_window.median()
df[f'std_{time}'] = rolling_window.std()
df[f'min_{time}'] = rolling_window.min()
df[f'max_{time}'] = rolling_window.max()
# Fill NaN values with -1 for consistency with the original code
df.fillna(-1, inplace=True)
return df.reset_index()
def get_all_ts_features(data, column):
res = pd.DataFrame()
res['timestamp'] = data['timestamp']
values = [1, 3, 6, 12, 24, 24 * 2, 24 * 4, 24 * 8]
for i in values:
features_df = get_ts_features(data, i, column)
res = res.merge(features_df, on='timestamp', how='left')
return res
def get_all_features(df, devices):
res = dict()
for i in devices:
res[i] = pd.DataFrame()
f = get_time_features(df)
for k in devices:
t = get_all_ts_features(df, k)
combined = f.merge(t, on='timestamp', how='left')
combined['type'] = 0
res[k] = pd.concat([res[k], combined.drop(['timestamp'], axis=1)], ignore_index=True)
return res
|