marik0 commited on
Commit
baaa327
·
1 Parent(s): 6fa0397

Created the demo

Browse files
Files changed (2) hide show
  1. app.py +148 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib
3
+ import matplotlib.pyplot as plt
4
+
5
+ from sklearn.ensemble import BaggingRegressor
6
+ from sklearn.tree import DecisionTreeRegressor
7
+
8
+ import gradio as gr
9
+
10
+ matplotlib.use('agg')
11
+
12
+ # Generate data
13
+ def f(x):
14
+ x = x.ravel()
15
+
16
+ return np.exp(-(x**2)) + 1.5 * np.exp(-((x - 2) ** 2))
17
+
18
+
19
+ def generate(n_samples, noise, n_repeat=1):
20
+ X = np.random.rand(n_samples) * 10 - 5
21
+ X = np.sort(X)
22
+
23
+ if n_repeat == 1:
24
+ y = f(X) + np.random.normal(0.0, noise, n_samples)
25
+ else:
26
+ y = np.zeros((n_samples, n_repeat))
27
+
28
+ for i in range(n_repeat):
29
+ y[:, i] = f(X) + np.random.normal(0.0, noise, n_samples)
30
+
31
+ X = X.reshape((n_samples, 1))
32
+
33
+ return X, y
34
+
35
+ def train_model(n_train, noise):
36
+ # Settings
37
+ n_repeat = 50 # Number of iterations for computing expectations
38
+ # n_train = 50 # Size of the training set
39
+ n_test = 1000 # Size of the test set
40
+ # noise = noise # Standard deviation of the noise
41
+ np.random.seed(0)
42
+
43
+ # Change this for exploring the bias-variance decomposition of other
44
+ # estimators. This should work well for estimators with high variance (e.g.,
45
+ # decision trees or KNN), but poorly for estimators with low variance (e.g.,
46
+ # linear models).
47
+ estimators = [
48
+ ("Tree", DecisionTreeRegressor()),
49
+ ("Bagging(Tree)", BaggingRegressor(DecisionTreeRegressor())),
50
+ ]
51
+
52
+ n_estimators = len(estimators)
53
+
54
+
55
+ X_train = []
56
+ y_train = []
57
+
58
+ for i in range(n_repeat):
59
+ X, y = generate(n_samples=n_train, noise=noise)
60
+ X_train.append(X)
61
+ y_train.append(y)
62
+
63
+ X_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)
64
+
65
+ fig = plt.figure(figsize=(10, 8))
66
+
67
+ out_str = ""
68
+ # Loop over estimators to compare
69
+ for n, (name, estimator) in enumerate(estimators):
70
+ # Compute predictions
71
+ y_predict = np.zeros((n_test, n_repeat))
72
+
73
+ for i in range(n_repeat):
74
+ estimator.fit(X_train[i], y_train[i])
75
+ y_predict[:, i] = estimator.predict(X_test)
76
+
77
+ # Bias^2 + Variance + Noise decomposition of the mean squared error
78
+ y_error = np.zeros(n_test)
79
+
80
+ for i in range(n_repeat):
81
+ for j in range(n_repeat):
82
+ y_error += (y_test[:, j] - y_predict[:, i]) ** 2
83
+
84
+ y_error /= n_repeat * n_repeat
85
+
86
+ y_noise = np.var(y_test, axis=1)
87
+ y_bias = (f(X_test) - np.mean(y_predict, axis=1)) ** 2
88
+ y_var = np.var(y_predict, axis=1)
89
+
90
+
91
+ out_str += f"{name}: {np.mean(y_error):.4f} (error) = {np.mean(y_bias):.4f} (bias^2) + {np.mean(y_var):.4f} (var) + {np.mean(y_noise):.4f} (noise)\n"
92
+
93
+
94
+ # Plot figures
95
+ plt.subplot(2, n_estimators, n + 1)
96
+ plt.plot(X_test, f(X_test), "b", label="$f(x)$")
97
+ plt.plot(X_train[0], y_train[0], ".b", label="LS ~ $y = f(x)+noise$")
98
+
99
+ for i in range(n_repeat):
100
+ if i == 0:
101
+ plt.plot(X_test, y_predict[:, i], "r", label=r"$\^y(x)$")
102
+ else:
103
+ plt.plot(X_test, y_predict[:, i], "r", alpha=0.05)
104
+
105
+ plt.plot(X_test, np.mean(y_predict, axis=1), "c", label=r"$\mathbb{E}_{LS} \^y(x)$")
106
+
107
+ plt.xlim([-5, 5])
108
+ plt.title(name)
109
+
110
+ if n == n_estimators - 1:
111
+ plt.legend(loc=(1.1, 0.5))
112
+
113
+ plt.subplot(2, n_estimators, n_estimators + n + 1)
114
+ plt.plot(X_test, y_error, "r", label="$error(x)$")
115
+ plt.plot(X_test, y_bias, "b", label="$bias^2(x)$"),
116
+ plt.plot(X_test, y_var, "g", label="$variance(x)$"),
117
+ plt.plot(X_test, y_noise, "c", label="$noise(x)$")
118
+
119
+ plt.xlim([-5, 5])
120
+ plt.ylim([0, noise])
121
+
122
+ if n == n_estimators - 1:
123
+ plt.legend(loc=(1.1, 0.5))
124
+
125
+ plt.subplots_adjust(right=0.75)
126
+
127
+ return fig, out_str
128
+
129
+ title = "Single estimator versus bagging: bias-variance decomposition ⚖️"
130
+ description = "This example illustrates and compares the bias-variance decomposition of the expected mean squared error of a single estimator against a bagging ensemble. "
131
+ with gr.Blocks() as demo:
132
+ gr.Markdown(f"## {title}")
133
+ gr.Markdown(description)
134
+
135
+ num_samples = gr.Slider(minimum=50, maximum=200, step=50, value=50, label="Number of samples")
136
+ noise = gr.Slider(minimum=0.05, maximum=0.2, step=0.05, value=0.1, label="Noise")
137
+
138
+ with gr.Row():
139
+ with gr.Row():
140
+ with gr.Column(scale=2):
141
+ plot = gr.Plot()
142
+ with gr.Column(scale=1):
143
+ results = gr.Textbox(label="Results")
144
+
145
+ num_samples.change(fn=train_model, inputs=[num_samples, noise], outputs=[plot, results])
146
+ noise.change(fn=train_model, inputs=[num_samples, noise], outputs=[plot, results])
147
+
148
+ demo.launch(enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ numpy
2
+ matplotlib
3
+ scikit-learn