|
import gradio as gr |
|
|
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
from sklearn import datasets, linear_model |
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
|
import matplotlib |
|
matplotlib.use('agg') |
|
|
|
FIGSIZE = (10,10) |
|
|
|
feature_names = ["Age", "Body-Mass Index (BMI)", "Blood Pressure", |
|
"Total serum Cholesterol", "Low-Density Lipoproteins (LDL)", |
|
"High-Density Lipoproteins (HDL)", "Total cholesterol / HDL", |
|
"log(Serum Triglycerides Level) (possibly)","Blood Sugar Level"] |
|
|
|
def create_dataset(feature_id=2): |
|
|
|
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True) |
|
|
|
|
|
diabetes_X = diabetes_X[:, np.newaxis, feature_id] |
|
|
|
|
|
diabetes_X_train = diabetes_X[:-20] |
|
diabetes_X_test = diabetes_X[-20:] |
|
|
|
|
|
diabetes_y_train = diabetes_y[:-20] |
|
diabetes_y_test = diabetes_y[-20:] |
|
|
|
return diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test |
|
|
|
def train_model(input_data): |
|
|
|
|
|
if input_data == 'age': |
|
feature_id = 0 |
|
else: |
|
feature_id = feature_names.index(input_data) + 1 |
|
|
|
diabetes_X_train, diabetes_X_test, diabetes_y_train, diabetes_y_test = create_dataset(feature_id) |
|
|
|
|
|
|
|
regr = linear_model.LinearRegression() |
|
|
|
|
|
regr.fit(diabetes_X_train, diabetes_y_train) |
|
|
|
|
|
diabetes_y_pred = regr.predict(diabetes_X_test) |
|
|
|
mse = mean_squared_error(diabetes_y_test, diabetes_y_pred) |
|
r2 = r2_score(diabetes_y_test, diabetes_y_pred) |
|
|
|
|
|
fig = plt.figure(figsize=FIGSIZE) |
|
|
|
|
|
plt.scatter(diabetes_X_test, diabetes_y_test, color="black") |
|
plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3) |
|
|
|
plt.xlabel(input_data, fontsize=18) |
|
plt.ylabel("Disease progression", fontsize=18) |
|
|
|
plt.xticks(()) |
|
plt.yticks(()) |
|
|
|
return fig, regr.coef_, mse, r2 |
|
|
|
title = "Linear Regression Example π" |
|
description = """The example shows how linear regression attempts to draw a straight line that will best minimize the residual sum of squares between the observed responses in the dataset. |
|
|
|
The diabetes dataset contains baseline variables (features), age, sex, body mass index, average blood pressure, and six blood serum measurements that were obtained for 442 diabetes patients. |
|
The predictive variable is a quantitative measure of the disease progression one year after the baseline. |
|
|
|
When selecting a feature from the drop-down menu, a linear regression model is trained for the specific feature and the predictive variable. |
|
The figure shows a scatter plot of the test set as well as the linear model (line). |
|
The mean square error and R2 scores are calculated using the test set and they are printed, along with the regression coefficiet of the model. |
|
""" |
|
with gr.Blocks() as demo: |
|
gr.Markdown(f"## {title}") |
|
gr.Markdown(description) |
|
|
|
with gr.Column(): |
|
|
|
with gr.Row(): |
|
plot = gr.Plot() |
|
with gr.Column(): |
|
input_data = gr.Dropdown(choices=feature_names, label="Feature", value="Body-Mass Index") |
|
coef = gr.Textbox(label="Coefficients") |
|
mse = gr.Textbox(label="Mean Squared Error (MSE)") |
|
r2 = gr.Textbox(label="R2 score") |
|
|
|
input_data.change(fn=train_model, inputs=[input_data], outputs=[plot, coef, mse, r2], queue=False) |
|
|
|
|
|
demo.launch(enable_queue=True) |
|
|