import pandas as pd import numpy as np def get_time_features(data): df = pd.DataFrame() df['timestamp'] = data['timestamp'] df['hour'] = data['timestamp'].dt.hour df['day_of_week'] = data['timestamp'].dt.dayofweek df['day'] = data['timestamp'].dt.day df['month'] = data['timestamp'].dt.month season_label = lambda x: 0 if 3 <= x <= 5 else 1 if 6 <= x <= 8 else 2 if 9 <= x <= 11 else 3 df['season'] = df['month'].apply(season_label) cols = df.columns[1::] for i in cols: max_values = df[i].max() df[f'{i}_sin'] = np.sin(2 * np.pi * df[i] / max_values) df[f'{i}_cos'] = np.cos(2 * np.pi * df[i] / max_values) time_label = lambda x: 0 if 6 <= x <= 11 else 1 if 12 <= x <= 17 else 2 if 18 <= x <= 21 else 3 df['part_of_day'] = df['hour'].apply(time_label) working_hours_label = lambda x: 1 if 9 <= x < 17 else 0 df['is_working_hours'] = df['hour'].apply(working_hours_label) is_weekend_label = lambda x: 1 if x >= 5 else 0 df['is_weekend'] = df['day_of_week'].apply(is_weekend_label) return df def get_ts_features(data, time, column): data = data[['timestamp', column]].copy() data.set_index('timestamp', inplace=True) # Create a rolling window with the given time span rolling_window = data[column].rolling(f'{time}H', closed='both') # Calculate the desired statistics df = pd.DataFrame(index=data.index) df[f'exact_{time}'] = data[column] df[f'mean_{time}'] = rolling_window.mean() df[f'median_{time}'] = rolling_window.median() df[f'std_{time}'] = rolling_window.std() df[f'min_{time}'] = rolling_window.min() df[f'max_{time}'] = rolling_window.max() # Fill NaN values with -1 for consistency with the original code df.fillna(-1, inplace=True) return df.reset_index() def get_all_ts_features(data, column): res = pd.DataFrame() res['timestamp'] = data['timestamp'] values = [1, 3, 6, 12, 24, 24 * 2, 24 * 4, 24 * 8] for i in values: features_df = get_ts_features(data, i, column) res = res.merge(features_df, on='timestamp', how='left') return res def get_all_features(df, devices): res = dict() for i in devices: res[i] = pd.DataFrame() f = get_time_features(df) for k in devices: t = get_all_ts_features(df, k) combined = f.merge(t, on='timestamp', how='left') combined['type'] = 0 res[k] = pd.concat([res[k], combined.drop(['timestamp'], axis=1)], ignore_index=True) return res