Jayabalambika commited on
Commit
3234b71
1 Parent(s): 11f99bb

Update app.py

Browse files

review comments

Files changed (1) hide show
  1. app.py +100 -3
app.py CHANGED
@@ -11,6 +11,88 @@ from sklearn.datasets import load_diabetes
11
 
12
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def load_dataset():
15
  X, y = load_diabetes(return_X_y=True, as_frame=True)
16
  return X,y
@@ -84,9 +166,18 @@ with gr.Blocks(title=title) as demo:
84
  gr.Markdown(f"# {title}")
85
  gr.Markdown(
86
  """
87
- A LassoLarsIC estimator is fit on a diabetes dataset and the AIC and the BIC criteria are used to select the best model.
88
- It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria that are computed in-sample,
89
- thus on the training set directly. This approach differs from the cross-validation procedure
 
 
 
 
 
 
 
 
 
90
  """
91
 
92
  )
@@ -113,4 +204,10 @@ with gr.Blocks(title=title) as demo:
113
 
114
 
115
 
 
 
 
 
 
 
116
  demo.launch()
 
11
 
12
 
13
 
14
+ def load_dataset():
15
+ X, y = load_diabetes(return_X_y=True, as_frame=True)
16
+ return X,y
17
+
18
+
19
+ def aic_pipeline(X,y):
20
+ lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y)
21
+ return lasso_lars_ic
22
+
23
+
24
+ def zou_et_al_criterion_rescaling(criterion, n_samples, noise_variance):
25
+ """Rescale the information criterion to follow the definition of Zou et al."""
26
+ return criterion - n_samples * np.log(2 * np.pi * noise_variance) - n_samples
27
+
28
+
29
+ def zou_et_all_aic(lasso_lars_ic):
30
+ aic_criterion = zou_et_al_criterion_rescaling(
31
+ lasso_lars_ic[-1].criterion_,
32
+ n_samples,
33
+ lasso_lars_ic[-1].noise_variance_,
34
+ )
35
+
36
+ index_alpha_path_aic = np.flatnonzero(
37
+ lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
38
+ )[0]
39
+
40
+ return index_alpha_path_aic, aic_criterion
41
+
42
+ def zou_et_all_bic(lasso_lars_ic):
43
+ lasso_lars_ic.set_params(lassolarsic__criterion="bic").fit(X, y)
44
+ bic_criterion = zou_et_al_criterion_rescaling(
45
+ lasso_lars_ic[-1].criterion_,
46
+ n_samples,
47
+ lasso_lars_ic[-1].noise_variance_,
48
+ )
49
+
50
+ index_alpha_path_bic = np.flatnonzero(
51
+ lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
52
+ )[0]
53
+
54
+ return index_alpha_path_bic, bic_criterion
55
+
56
+ def fn_assert_true():
57
+ assert index_alpha_path_bic == index_alpha_path_aic
58
+
59
+
60
+
61
+ def visualize_input_data():
62
+ fig = plt.figure(1, facecolor="w", figsize=(5, 5))
63
+ plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion")
64
+ plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
65
+ plt.vlines(
66
+ index_alpha_path_bic,
67
+ aic_criterion.min(),
68
+ aic_criterion.max(),
69
+ color="black",
70
+ linestyle="--",
71
+ label="Selected alpha",
72
+ )
73
+ plt.legend()
74
+ plt.ylabel("Information criterion")
75
+ plt.xlabel("Lasso model sequence")
76
+ _ = plt.title("Lasso model selection via AIC and BIC")
77
+
78
+
79
+ return fig
80
+
81
+ title = " Lasso model selection via information criteria"
82
+
83
+ import gradio as gr
84
+ import matplotlib.pyplot as plt
85
+ # from skops import hub_utils
86
+ import time
87
+ import pickle
88
+ import numpy as np
89
+ from sklearn.preprocessing import StandardScaler
90
+ from sklearn.linear_model import LassoLarsIC
91
+ from sklearn.pipeline import make_pipeline
92
+ from sklearn.datasets import load_diabetes
93
+
94
+
95
+
96
  def load_dataset():
97
  X, y = load_diabetes(return_X_y=True, as_frame=True)
98
  return X,y
 
166
  gr.Markdown(f"# {title}")
167
  gr.Markdown(
168
  """
169
+ # Probabilistic model selection using Information Criterion.
170
+ This method in statistics is useful because they dont require a hold out set test set(cross validation set).
171
+
172
+ AIC and BIC are two ways of scoring a model based on its log-likelihood and complexity.
173
+
174
+ It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria
175
+ that are computed in-sample, thus on the training set directly.
176
+ This approach differs from the cross-validation procedure.
177
+
178
+ Also one of the drawbacks of these kinds of Probabilistic model is that same general statistic cannot be used across models.
179
+ Instead, a careful metric must be devised for each of the models separately.
180
+ The uncertainity of the model is not taken into account.
181
  """
182
 
183
  )
 
204
 
205
 
206
 
207
+ demo.launch()
208
+
209
+
210
+
211
+
212
+
213
  demo.launch()