File size: 4,120 Bytes
aaf6ffd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73a3abc
 
e62a721
 
 
 
 
 
 
1dcf872
1f82d2d
 
 
 
 
e62a721
1dcf872
 
e62a721
1dcf872
 
e62a721
 
 
1dcf872
e62a721
73a3abc
1f82d2d
 
 
05bf296
1f82d2d
 
05bf296
1f82d2d
 
05bf296
1f82d2d
05bf296
1f82d2d
 
 
05bf296
1f82d2d
 
 
 
 
 
 
 
aaf6ffd
 
 
 
 
 
 
03592e6
aaf6ffd
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import gradio as gr
from huggingface_hub import from_pretrained_keras
import pandas as pd
import numpy as np
import json
from matplotlib import pyplot as plt

f = open('scaler.json')
scaler = json.load(f)

TIME_STEPS = 288

# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
    output = []
    for i in range(len(values) - time_steps + 1):
        output.append(values[i : (i + time_steps)])
    return np.stack(output)


def normalize_data(data):
    df_test_value = (data - scaler["mean"]) / scaler["std"]
    return df_test_value

def plot_test_data(df_test_value):
    fig, ax = plt.subplots(figsize=(12, 6))
    df_test_value.plot(legend=False, ax=ax)
    ax.set_xlabel("Time")
    ax.set_ylabel("Value")
    ax.set_title("Input Test Data")
    return fig

def get_anomalies(df_test_value):
    # Create sequences from test values.
    x_test = create_sequences(df_test_value.values)
    model = from_pretrained_keras("keras-io/timeseries-anomaly-detection")

    # Get test MAE loss.
    x_test_pred = model.predict(x_test)
    test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
    test_mae_loss = test_mae_loss.reshape((-1))

    # Detect all the samples which are anomalies.
    anomalies = test_mae_loss > scaler["threshold"]
    return anomalies

def plot_anomalies(df_test_value, data, anomalies):
    # data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
    anomalous_data_indices = []
    for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1):
        if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]):
            anomalous_data_indices.append(data_idx)
    df_subset = data.iloc[anomalous_data_indices]
    fig, ax = plt.subplots(figsize=(12, 6))
    data.plot(legend=False, ax=ax)
    df_subset.plot(legend=False, ax=ax, color="r")
    ax.set_xlabel("Time")
    ax.set_ylabel("Value")
    ax.set_title("Anomalous Data Points")
    return fig

def clean_data(df):
    # Check if the DataFrame already contains the correct columns
    if "timestamp" in df.columns and "value" in df.columns:
        df["timestamp"] = pd.to_datetime(df["timestamp"])
        return df

    # Check if DataFrame contains the columns to be converted
    elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
        # Convert "Date" and "Hour" columns into datetime format
        df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')

        # Handle the case where hour is 24
        df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
        df["timestamp"] = df["timestamp"].dt.floor('h')

        # Keep only necessary columns
        df = df[["timestamp", "Hourly_Labor_Hours_Total"]]

        # Rename column
        df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)

        return df

    else:
        raise ValueError("Dataframe does not contain necessary columns.")

def master(file):
    # read file
    data = pd.read_csv(file.name)

    # clean data
    data = clean_data(data)

    # Convert timestamp to datetime after cleaning
    data['timestamp'] = pd.to_datetime(data['timestamp'])

    data.set_index("timestamp", inplace=True)

    # Check if data has enough records to create sequences
    if len(data) < TIME_STEPS:
        return "Not enough data to create sequences. Need at least {} records.".format(TIME_STEPS)

    df_test_value = normalize_data(data)
    # plot input test data
    plot1 = plot_test_data(df_test_value)
    # predict
    anomalies = get_anomalies(df_test_value)
    #plot anomalous data points
    plot2 = plot_anomalies(df_test_value, data, anomalies)
    return plot2

outputs = gr.outputs.Image()

iface = gr.Interface(
    fn=master,
    inputs=gr.inputs.File(label="CSV File"),
    outputs=outputs,
    examples=["art_daily_jumpsup.csv","labor_hourly_short.csv"],
    title="Timeseries Anomaly Detection Using an Autoencoder",
    description="Anomaly detection of timeseries data."
)

iface.launch()