Spaces:

sklearn-docs
/

Lasso-model-aic-bic

Sleeping

App Files Files Community

Jayabalambika commited on Apr 25, 2023

Commit

3234b71

•

1 Parent(s): 11f99bb

Update app.py

Browse files

review comments

Files changed (1) hide show

app.py +100 -3

app.py CHANGED Viewed

@@ -11,6 +11,88 @@ from sklearn.datasets import load_diabetes
 def load_dataset():
   X, y = load_diabetes(return_X_y=True, as_frame=True)
   return X,y
@@ -84,9 +166,18 @@ with gr.Blocks(title=title) as demo:
   gr.Markdown(f"# {title}")
   gr.Markdown(
         """
-        A LassoLarsIC estimator is fit on a diabetes dataset and the AIC and the BIC criteria are used to select the best model.
-        It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria that are computed in-sample,
-        thus on the training set directly. This approach differs from the cross-validation procedure
         """
   )
@@ -113,4 +204,10 @@ with gr.Blocks(title=title) as demo:
 demo.launch()

+def load_dataset():
+  X, y = load_diabetes(return_X_y=True, as_frame=True)
+  return X,y
+def aic_pipeline(X,y):
+  lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y)
+  return lasso_lars_ic
+def zou_et_al_criterion_rescaling(criterion, n_samples, noise_variance):
+    """Rescale the information criterion to follow the definition of Zou et al."""
+    return criterion - n_samples * np.log(2 * np.pi * noise_variance) - n_samples
+def zou_et_all_aic(lasso_lars_ic):
+  aic_criterion = zou_et_al_criterion_rescaling(
+      lasso_lars_ic[-1].criterion_,
+      n_samples,
+      lasso_lars_ic[-1].noise_variance_,
+  )
+  index_alpha_path_aic = np.flatnonzero(
+      lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
+  )[0]
+  return index_alpha_path_aic, aic_criterion
+def zou_et_all_bic(lasso_lars_ic):
+  lasso_lars_ic.set_params(lassolarsic__criterion="bic").fit(X, y)
+  bic_criterion = zou_et_al_criterion_rescaling(
+    lasso_lars_ic[-1].criterion_,
+    n_samples,
+    lasso_lars_ic[-1].noise_variance_,
+  )
+  index_alpha_path_bic = np.flatnonzero(
+    lasso_lars_ic[-1].alphas_ == lasso_lars_ic[-1].alpha_
+  )[0]
+  return index_alpha_path_bic, bic_criterion
+def fn_assert_true():
+  assert index_alpha_path_bic == index_alpha_path_aic
+def visualize_input_data():
+    fig = plt.figure(1, facecolor="w", figsize=(5, 5))
+    plt.plot(aic_criterion, color="tab:blue", marker="o", label="AIC criterion")
+    plt.plot(bic_criterion, color="tab:orange", marker="o", label="BIC criterion")
+    plt.vlines(
+        index_alpha_path_bic,
+        aic_criterion.min(),
+        aic_criterion.max(),
+        color="black",
+        linestyle="--",
+        label="Selected alpha",
+    )
+    plt.legend()
+    plt.ylabel("Information criterion")
+    plt.xlabel("Lasso model sequence")
+    _ = plt.title("Lasso model selection via AIC and BIC")
+    return fig
+title = " Lasso model selection via information criteria"
+import gradio as gr
+import matplotlib.pyplot as plt
+# from skops import hub_utils
+import time
+import pickle
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+from sklearn.linear_model import LassoLarsIC
+from sklearn.pipeline import make_pipeline
+from sklearn.datasets import load_diabetes
 def load_dataset():
   X, y = load_diabetes(return_X_y=True, as_frame=True)
   return X,y
   gr.Markdown(f"# {title}")
   gr.Markdown(
         """
+        # Probabilistic model selection using Information Criterion.
+        This method in statistics is useful because they dont require a hold out set test set(cross validation set).
+        AIC and BIC are two ways of scoring a model based on its log-likelihood and complexity.
+        It is important to note that the optimization to find alpha with LassoLarsIC relies on the AIC or BIC criteria
+        that are computed in-sample, thus on the training set directly.
+        This approach differs from the cross-validation procedure.
+        Also one of the drawbacks of these kinds of Probabilistic model is that same general statistic cannot be used across models.
+        Instead, a careful metric must be devised for each of the models separately.
+        The uncertainity of the model is not taken into account.
         """
   )
+demo.launch()
 demo.launch()