Spaces:
Sleeping
Sleeping
Jayabalambika
commited on
Commit
•
3234b71
1
Parent(s):
11f99bb
Update app.py
Browse filesreview comments
app.py
CHANGED
@@ -11,6 +11,88 @@ from sklearn.datasets import load_diabetes
|
|
11 |
|
12 |
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
def load_dataset():
|
15 |
X, y = load_diabetes(return_X_y=True, as_frame=True)
|
16 |
return X,y
|
@@ -84,9 +166,18 @@ with gr.Blocks(title=title) as demo:
|
|
84 |
gr.Markdown(f"# {title}")
|
85 |
gr.Markdown(
|
86 |
"""
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
"""
|
91 |
|
92 |
)
|
@@ -113,4 +204,10 @@ with gr.Blocks(title=title) as demo:
|
|
113 |
|
114 |
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
demo.launch()
|
|
|
11 |
|
12 |
|
13 |
|
14 |
+
def load_dataset():
|
15 |
+
X, y = load_diabetes(return_X_y=True, as_frame=True)
|
16 |
+
return X,y
|
17 |
+
|
18 |
+
|
19 |
+
def aic_pipeline(X,y):
|
20 |
+
lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y)
|
21 |
+
return lasso_lars_ic
|
22 |
+
|
23 |
+
|
24 |
+
def zou_et_al_criterion_rescaling(criterion, n_samples, noise_variance):
|
25 |
+
"""Rescale the information criterion to follow the definition of Zou et al."""
|
26 |
+
return criterion - n_samples * np.log(2 * np.pi * noise_variance) - n_samples
|
27 |
+
|
28 |
+
|
29 |
+
def zou_et_all_aic(lasso_lars_ic):
|
30 |
+
aic_criterion = zou_et_al_criterion_rescaling(
|
31 |
+
lasso_lars_ic[-1].criterion_,
|
32 |
+
n_samples,
|
33 |
+
lasso_lars_ic[-1].noise_variance_,
|
34 |
+
)
|
35 |
+
|
36 |
+
index_alpha_path_aic = np.flatnonzero(
|
37 |
+
lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
|
38 |
+
)[0]
|
39 |
+
|
40 |
+
return index_alpha_path_aic, aic_criterion
|
41 |
+
|
42 |
+
def zou_et_all_bic(lasso_lars_ic):
|
43 |
+
lasso_lars_ic.set_params(lassolarsic__criterion="bic").fit(X, y)
|
44 |
+
bic_criterion = zou_et_al_criterion_rescaling(
|
45 |
+
lasso_lars_ic[-1].criterion_,
|
46 |
+
n_samples,
|
47 |
+
lasso_lars_ic[-1].noise_variance_,
|
48 |
+
)
|
49 |
+
|
50 |
+
index_alpha_path_bic = np.flatnonzero(
|
51 |
+
lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
|
52 |
+
)[0]
|
53 |
+
|
54 |
+
return index_alpha_path_bic, bic_criterion
|
55 |
+
|
56 |
+
def fn_assert_true():
|
57 |
+
assert index_alpha_path_bic == index_alpha_path_aic
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
def visualize_input_data():
|
62 |
+
fig = plt.figure(1, facecolor="w", figsize=(5, 5))
|
63 |
+
plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion")
|
64 |
+
plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
|
65 |
+
plt.vlines(
|
66 |
+
index_alpha_path_bic,
|
67 |
+
aic_criterion.min(),
|
68 |
+
aic_criterion.max(),
|
69 |
+
color="black",
|
70 |
+
linestyle="--",
|
71 |
+
label="Selected alpha",
|
72 |
+
)
|
73 |
+
plt.legend()
|
74 |
+
plt.ylabel("Information criterion")
|
75 |
+
plt.xlabel("Lasso model sequence")
|
76 |
+
_ = plt.title("Lasso model selection via AIC and BIC")
|
77 |
+
|
78 |
+
|
79 |
+
return fig
|
80 |
+
|
81 |
+
title = " Lasso model selection via information criteria"
|
82 |
+
|
83 |
+
import gradio as gr
|
84 |
+
import matplotlib.pyplot as plt
|
85 |
+
# from skops import hub_utils
|
86 |
+
import time
|
87 |
+
import pickle
|
88 |
+
import numpy as np
|
89 |
+
from sklearn.preprocessing import StandardScaler
|
90 |
+
from sklearn.linear_model import LassoLarsIC
|
91 |
+
from sklearn.pipeline import make_pipeline
|
92 |
+
from sklearn.datasets import load_diabetes
|
93 |
+
|
94 |
+
|
95 |
+
|
96 |
def load_dataset():
|
97 |
X, y = load_diabetes(return_X_y=True, as_frame=True)
|
98 |
return X,y
|
|
|
166 |
gr.Markdown(f"# {title}")
|
167 |
gr.Markdown(
|
168 |
"""
|
169 |
+
# Probabilistic model selection using Information Criterion.
|
170 |
+
This method in statistics is useful because they dont require a hold out set test set(cross validation set).
|
171 |
+
|
172 |
+
AIC and BIC are two ways of scoring a model based on its log-likelihood and complexity.
|
173 |
+
|
174 |
+
It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria
|
175 |
+
that are computed in-sample, thus on the training set directly.
|
176 |
+
This approach differs from the cross-validation procedure.
|
177 |
+
|
178 |
+
Also one of the drawbacks of these kinds of Probabilistic model is that same general statistic cannot be used across models.
|
179 |
+
Instead, a careful metric must be devised for each of the models separately.
|
180 |
+
The uncertainity of the model is not taken into account.
|
181 |
"""
|
182 |
|
183 |
)
|
|
|
204 |
|
205 |
|
206 |
|
207 |
+
demo.launch()
|
208 |
+
|
209 |
+
|
210 |
+
|
211 |
+
|
212 |
+
|
213 |
demo.launch()
|