Spaces:
Sleeping
Sleeping
import pandas as pd | |
import numpy as np | |
def get_time_features(data): | |
df = pd.DataFrame() | |
df['timestamp'] = data['timestamp'] | |
df['hour'] = data['timestamp'].dt.hour | |
df['day_of_week'] = data['timestamp'].dt.dayofweek | |
df['day'] = data['timestamp'].dt.day | |
df['month'] = data['timestamp'].dt.month | |
season_label = lambda x: 0 if 3 <= x <= 5 else 1 if 6 <= x <= 8 else 2 if 9 <= x <= 11 else 3 | |
df['season'] = df['month'].apply(season_label) | |
cols = df.columns[1::] | |
for i in cols: | |
max_values = df[i].max() | |
df[f'{i}_sin'] = np.sin(2 * np.pi * df[i] / max_values) | |
df[f'{i}_cos'] = np.cos(2 * np.pi * df[i] / max_values) | |
time_label = lambda x: 0 if 6 <= x <= 11 else 1 if 12 <= x <= 17 else 2 if 18 <= x <= 21 else 3 | |
df['part_of_day'] = df['hour'].apply(time_label) | |
working_hours_label = lambda x: 1 if 9 <= x < 17 else 0 | |
df['is_working_hours'] = df['hour'].apply(working_hours_label) | |
is_weekend_label = lambda x: 1 if x >= 5 else 0 | |
df['is_weekend'] = df['day_of_week'].apply(is_weekend_label) | |
return df | |
def get_ts_features(data, time, column): | |
data = data[['timestamp', column]].copy() | |
data.set_index('timestamp', inplace=True) | |
# Create a rolling window with the given time span | |
rolling_window = data[column].rolling(f'{time}H', closed='both') | |
# Calculate the desired statistics | |
df = pd.DataFrame(index=data.index) | |
df[f'exact_{time}'] = data[column] | |
df[f'mean_{time}'] = rolling_window.mean() | |
df[f'median_{time}'] = rolling_window.median() | |
df[f'std_{time}'] = rolling_window.std() | |
df[f'min_{time}'] = rolling_window.min() | |
df[f'max_{time}'] = rolling_window.max() | |
# Fill NaN values with -1 for consistency with the original code | |
df.fillna(-1, inplace=True) | |
return df.reset_index() | |
def get_all_ts_features(data, column): | |
res = pd.DataFrame() | |
res['timestamp'] = data['timestamp'] | |
values = [1, 3, 6, 12, 24, 24 * 2, 24 * 4, 24 * 8] | |
for i in values: | |
features_df = get_ts_features(data, i, column) | |
res = res.merge(features_df, on='timestamp', how='left') | |
return res | |
def get_all_features(df, devices): | |
res = dict() | |
for i in devices: | |
res[i] = pd.DataFrame() | |
f = get_time_features(df) | |
for k in devices: | |
t = get_all_ts_features(df, k) | |
combined = f.merge(t, on='timestamp', how='left') | |
combined['type'] = 0 | |
res[k] = pd.concat([res[k], combined.drop(['timestamp'], axis=1)], ignore_index=True) | |
return res | |