File size: 163,512 Bytes
9864402 |
1 |
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.9","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"## Decision Trees tutorial & improving hosting with skops 🌲\n\nIn this notebook I will walk you through decision trees and how to inspect them, and we will later improve model hosting using [skops](https://skops.readthedocs.io/en/stable/). ","metadata":{}},{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","_kg_hide-input":true,"_kg_hide-output":true,"execution":{"iopub.status.busy":"2022-12-01T13:21:07.411748Z","iopub.execute_input":"2022-12-01T13:21:07.412350Z","iopub.status.idle":"2022-12-01T13:21:07.419860Z","shell.execute_reply.started":"2022-12-01T13:21:07.412261Z","shell.execute_reply":"2022-12-01T13:21:07.418325Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"!pip install skops","metadata":{"_kg_hide-output":true,"execution":{"iopub.status.busy":"2022-12-01T13:21:09.851317Z","iopub.execute_input":"2022-12-01T13:21:09.851890Z","iopub.status.idle":"2022-12-01T13:21:15.803438Z","shell.execute_reply.started":"2022-12-01T13:21:09.851859Z","shell.execute_reply":"2022-12-01T13:21:15.802081Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stdout","text":"Requirement already satisfied: skops in /opt/conda/lib/python3.7/site-packages (0.3.0)\nRequirement already satisfied: tabulate>=0.8.8 in /opt/conda/lib/python3.7/site-packages (from skops) (0.8.8)\nRequirement already satisfied: typing-extensions>=3.7 in /opt/conda/lib/python3.7/site-packages (from skops) (3.7.4.3)\nRequirement already satisfied: huggingface-hub>=0.10.1 in /opt/conda/lib/python3.7/site-packages (from skops) (0.11.1)\nRequirement already satisfied: scikit-learn>=0.24 in /opt/conda/lib/python3.7/site-packages (from skops) (0.24.1)\nRequirement already satisfied: packaging>=20.9 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub>=0.10.1->skops) (21.3)\nRequirement already satisfied: tqdm in /opt/conda/lib/python3.7/site-packages (from huggingface-hub>=0.10.1->skops) (4.55.1)\nRequirement already satisfied: pyyaml>=5.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub>=0.10.1->skops) (5.3.1)\nRequirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub>=0.10.1->skops) (2.25.1)\nRequirement already satisfied: importlib-metadata in /opt/conda/lib/python3.7/site-packages (from huggingface-hub>=0.10.1->skops) (3.3.0)\nRequirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub>=0.10.1->skops) (3.0.12)\nRequirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /opt/conda/lib/python3.7/site-packages (from packaging>=20.9->huggingface-hub>=0.10.1->skops) (2.4.7)\nRequirement already satisfied: scipy>=0.19.1 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.24->skops) (1.5.4)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.24->skops) (2.1.0)\nRequirement already satisfied: joblib>=0.11 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.24->skops) (1.0.0)\nRequirement already satisfied: numpy>=1.13.3 in /opt/conda/lib/python3.7/site-packages (from scikit-learn>=0.24->skops) (1.19.5)\nRequirement already satisfied: zipp>=0.5 in /opt/conda/lib/python3.7/site-packages (from importlib-metadata->huggingface-hub>=0.10.1->skops) (3.4.0)\nRequirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub>=0.10.1->skops) (1.26.2)\nRequirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub>=0.10.1->skops) (2020.12.5)\nRequirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub>=0.10.1->skops) (2.10)\nRequirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub>=0.10.1->skops) (3.0.4)\n\u001b[33mWARNING: You are using pip version 21.0.1; however, version 22.3.1 is available.\nYou should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.\u001b[0m\n","output_type":"stream"}]},{"cell_type":"markdown","source":"We will use breast cancer dataset from sklearn datasets. We will load the dataset and split. ","metadata":{}},{"cell_type":"code","source":"from sklearn.datasets import load_breast_cancer\nfrom sklearn.model_selection import train_test_split","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:21:16.196274Z","iopub.execute_input":"2022-12-01T13:21:16.196592Z","iopub.status.idle":"2022-12-01T13:21:16.523656Z","shell.execute_reply.started":"2022-12-01T13:21:16.196567Z","shell.execute_reply":"2022-12-01T13:21:16.522085Z"},"trusted":true},"execution_count":3,"outputs":[]},{"cell_type":"code","source":"cancer = load_breast_cancer()\ndata = pd.DataFrame(cancer.data, columns=[cancer.feature_names])\ndata.head()","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:21:16.668054Z","iopub.execute_input":"2022-12-01T13:21:16.668383Z","iopub.status.idle":"2022-12-01T13:21:16.719596Z","shell.execute_reply.started":"2022-12-01T13:21:16.668356Z","shell.execute_reply":"2022-12-01T13:21:16.717624Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":" mean radius mean texture mean perimeter mean area mean smoothness \\\n0 17.99 10.38 122.80 1001.0 0.11840 \n1 20.57 17.77 132.90 1326.0 0.08474 \n2 19.69 21.25 130.00 1203.0 0.10960 \n3 11.42 20.38 77.58 386.1 0.14250 \n4 20.29 14.34 135.10 1297.0 0.10030 \n\n mean compactness mean concavity mean concave points mean symmetry \\\n0 0.27760 0.3001 0.14710 0.2419 \n1 0.07864 0.0869 0.07017 0.1812 \n2 0.15990 0.1974 0.12790 0.2069 \n3 0.28390 0.2414 0.10520 0.2597 \n4 0.13280 0.1980 0.10430 0.1809 \n\n mean fractal dimension ... worst radius worst texture worst perimeter \\\n0 0.07871 ... 25.38 17.33 184.60 \n1 0.05667 ... 24.99 23.41 158.80 \n2 0.05999 ... 23.57 25.53 152.50 \n3 0.09744 ... 14.91 26.50 98.87 \n4 0.05883 ... 22.54 16.67 152.20 \n\n worst area worst smoothness worst compactness worst concavity \\\n0 2019.0 0.1622 0.6656 0.7119 \n1 1956.0 0.1238 0.1866 0.2416 \n2 1709.0 0.1444 0.4245 0.4504 \n3 567.7 0.2098 0.8663 0.6869 \n4 1575.0 0.1374 0.2050 0.4000 \n\n worst concave points worst symmetry worst fractal dimension \n0 0.2654 0.4601 0.11890 \n1 0.1860 0.2750 0.08902 \n2 0.2430 0.3613 0.08758 \n3 0.2575 0.6638 0.17300 \n4 0.1625 0.2364 0.07678 \n\n[5 rows x 30 columns]","text/html":"<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead tr th {\n text-align: left;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr>\n <th></th>\n <th>mean radius</th>\n <th>mean texture</th>\n <th>mean perimeter</th>\n <th>mean area</th>\n <th>mean smoothness</th>\n <th>mean compactness</th>\n <th>mean concavity</th>\n <th>mean concave points</th>\n <th>mean symmetry</th>\n <th>mean fractal dimension</th>\n <th>...</th>\n <th>worst radius</th>\n <th>worst texture</th>\n <th>worst perimeter</th>\n <th>worst area</th>\n <th>worst smoothness</th>\n <th>worst compactness</th>\n <th>worst concavity</th>\n <th>worst concave points</th>\n <th>worst symmetry</th>\n <th>worst fractal dimension</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>17.99</td>\n <td>10.38</td>\n <td>122.80</td>\n <td>1001.0</td>\n <td>0.11840</td>\n <td>0.27760</td>\n <td>0.3001</td>\n <td>0.14710</td>\n <td>0.2419</td>\n <td>0.07871</td>\n <td>...</td>\n <td>25.38</td>\n <td>17.33</td>\n <td>184.60</td>\n <td>2019.0</td>\n <td>0.1622</td>\n <td>0.6656</td>\n <td>0.7119</td>\n <td>0.2654</td>\n <td>0.4601</td>\n <td>0.11890</td>\n </tr>\n <tr>\n <th>1</th>\n <td>20.57</td>\n <td>17.77</td>\n <td>132.90</td>\n <td>1326.0</td>\n <td>0.08474</td>\n <td>0.07864</td>\n <td>0.0869</td>\n <td>0.07017</td>\n <td>0.1812</td>\n <td>0.05667</td>\n <td>...</td>\n <td>24.99</td>\n <td>23.41</td>\n <td>158.80</td>\n <td>1956.0</td>\n <td>0.1238</td>\n <td>0.1866</td>\n <td>0.2416</td>\n <td>0.1860</td>\n <td>0.2750</td>\n <td>0.08902</td>\n </tr>\n <tr>\n <th>2</th>\n <td>19.69</td>\n <td>21.25</td>\n <td>130.00</td>\n <td>1203.0</td>\n <td>0.10960</td>\n <td>0.15990</td>\n <td>0.1974</td>\n <td>0.12790</td>\n <td>0.2069</td>\n <td>0.05999</td>\n <td>...</td>\n <td>23.57</td>\n <td>25.53</td>\n <td>152.50</td>\n <td>1709.0</td>\n <td>0.1444</td>\n <td>0.4245</td>\n <td>0.4504</td>\n <td>0.2430</td>\n <td>0.3613</td>\n <td>0.08758</td>\n </tr>\n <tr>\n <th>3</th>\n <td>11.42</td>\n <td>20.38</td>\n <td>77.58</td>\n <td>386.1</td>\n <td>0.14250</td>\n <td>0.28390</td>\n <td>0.2414</td>\n <td>0.10520</td>\n <td>0.2597</td>\n <td>0.09744</td>\n <td>...</td>\n <td>14.91</td>\n <td>26.50</td>\n <td>98.87</td>\n <td>567.7</td>\n <td>0.2098</td>\n <td>0.8663</td>\n <td>0.6869</td>\n <td>0.2575</td>\n <td>0.6638</td>\n <td>0.17300</td>\n </tr>\n <tr>\n <th>4</th>\n <td>20.29</td>\n <td>14.34</td>\n <td>135.10</td>\n <td>1297.0</td>\n <td>0.10030</td>\n <td>0.13280</td>\n <td>0.1980</td>\n <td>0.10430</td>\n <td>0.1809</td>\n <td>0.05883</td>\n <td>...</td>\n <td>22.54</td>\n <td>16.67</td>\n <td>152.20</td>\n <td>1575.0</td>\n <td>0.1374</td>\n <td>0.2050</td>\n <td>0.4000</td>\n <td>0.1625</td>\n <td>0.2364</td>\n <td>0.07678</td>\n </tr>\n </tbody>\n</table>\n<p>5 rows × 30 columns</p>\n</div>"},"metadata":{}}]},{"cell_type":"code","source":"X = cancer.data\ny = cancer.target\nX_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, \n random_state=42)","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:21:17.243233Z","iopub.execute_input":"2022-12-01T13:21:17.243595Z","iopub.status.idle":"2022-12-01T13:21:17.251729Z","shell.execute_reply.started":"2022-12-01T13:21:17.243563Z","shell.execute_reply":"2022-12-01T13:21:17.250403Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"from sklearn.tree import DecisionTreeClassifier\ntree = DecisionTreeClassifier(random_state=0)\ntree.fit(X_train, y_train)","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:21:18.976344Z","iopub.execute_input":"2022-12-01T13:21:18.976921Z","iopub.status.idle":"2022-12-01T13:21:19.137843Z","shell.execute_reply.started":"2022-12-01T13:21:18.976882Z","shell.execute_reply":"2022-12-01T13:21:19.135814Z"},"trusted":true},"execution_count":6,"outputs":[{"execution_count":6,"output_type":"execute_result","data":{"text/plain":"DecisionTreeClassifier(random_state=0)"},"metadata":{}}]},{"cell_type":"markdown","source":"## Evaluate and Inspect the Model","metadata":{}},{"cell_type":"code","source":"from sklearn.metrics import classification_report\ny_pred = tree.predict(X_test)\nprint(classification_report(y_test, y_pred))","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:21:19.705292Z","iopub.execute_input":"2022-12-01T13:21:19.705688Z","iopub.status.idle":"2022-12-01T13:21:19.718621Z","shell.execute_reply.started":"2022-12-01T13:21:19.705652Z","shell.execute_reply":"2022-12-01T13:21:19.717510Z"},"trusted":true},"execution_count":7,"outputs":[{"name":"stdout","text":" precision recall f1-score support\n\n 0 0.91 0.92 0.92 53\n 1 0.96 0.94 0.95 90\n\n accuracy 0.94 143\n macro avg 0.93 0.93 0.93 143\nweighted avg 0.94 0.94 0.94 143\n\n","output_type":"stream"}]},{"cell_type":"code","source":"report = pd.DataFrame.from_dict(classification_report(y_test, y_pred, output_dict = True))","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:21:35.456594Z","iopub.execute_input":"2022-12-01T13:21:35.457151Z","iopub.status.idle":"2022-12-01T13:21:35.469930Z","shell.execute_reply.started":"2022-12-01T13:21:35.457116Z","shell.execute_reply":"2022-12-01T13:21:35.468844Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"print(report)","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:21:36.065318Z","iopub.execute_input":"2022-12-01T13:21:36.065648Z","iopub.status.idle":"2022-12-01T13:21:36.073465Z","shell.execute_reply.started":"2022-12-01T13:21:36.065622Z","shell.execute_reply":"2022-12-01T13:21:36.072161Z"},"trusted":true},"execution_count":10,"outputs":[{"name":"stdout","text":" 0 1 accuracy macro avg weighted avg\nprecision 0.907407 0.955056 0.937063 0.931232 0.937396\nrecall 0.924528 0.944444 0.937063 0.934486 0.937063\nf1-score 0.915888 0.949721 0.937063 0.932804 0.937181\nsupport 53.000000 90.000000 0.937063 143.000000 143.000000\n","output_type":"stream"}]},{"cell_type":"code","source":"from sklearn.tree import export_graphviz\nexport_graphviz(tree, out_file=\"tree.dot\", class_names=[\"malignant\", \"benign\"],\n feature_names=cancer.feature_names, impurity=False, filled=True)","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:21:59.435038Z","iopub.execute_input":"2022-12-01T13:21:59.435400Z","iopub.status.idle":"2022-12-01T13:21:59.447564Z","shell.execute_reply.started":"2022-12-01T13:21:59.435368Z","shell.execute_reply":"2022-12-01T13:21:59.446135Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"code","source":"import pydot\nimport graphviz\n\n(graph,) = pydot.graph_from_dot_file('tree.dot')\n\nwith open(\"tree.dot\") as f:\n dot_graph = f.read()\ndisplay(graphviz.Source(dot_graph))","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:22:16.395803Z","iopub.execute_input":"2022-12-01T13:22:16.396179Z","iopub.status.idle":"2022-12-01T13:22:16.630158Z","shell.execute_reply.started":"2022-12-01T13:22:16.396144Z","shell.execute_reply":"2022-12-01T13:22:16.628958Z"},"_kg_hide-output":true,"trusted":true},"execution_count":12,"outputs":[{"output_type":"display_data","data":{"text/plain":"<graphviz.files.Source at 0x7ff835f585d0>","image/svg+xml":"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.42.3 (20191010.1750)\n -->\n<!-- Title: Tree Pages: 1 -->\n<svg width=\"1470pt\" height=\"789pt\"\n viewBox=\"0.00 0.00 1470.00 789.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 785)\">\n<title>Tree</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-785 1466,-785 1466,4 -4,4\"/>\n<!-- 0 -->\n<g id=\"node1\" class=\"node\">\n<title>0</title>\n<polygon fill=\"#afd7f4\" stroke=\"black\" points=\"1057,-781 912,-781 912,-713 1057,-713 1057,-781\"/>\n<text text-anchor=\"middle\" x=\"984.5\" y=\"-765.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst radius <= 16.795</text>\n<text text-anchor=\"middle\" x=\"984.5\" y=\"-750.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 426</text>\n<text text-anchor=\"middle\" x=\"984.5\" y=\"-735.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [159, 267]</text>\n<text text-anchor=\"middle\" x=\"984.5\" y=\"-720.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 1 -->\n<g id=\"node2\" class=\"node\">\n<title>1</title>\n<polygon fill=\"#4ca6e8\" stroke=\"black\" points=\"916.5,-677 730.5,-677 730.5,-609 916.5,-609 916.5,-677\"/>\n<text text-anchor=\"middle\" x=\"823.5\" y=\"-661.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst concave points <= 0.136</text>\n<text text-anchor=\"middle\" x=\"823.5\" y=\"-646.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 284</text>\n<text text-anchor=\"middle\" x=\"823.5\" y=\"-631.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [25, 259]</text>\n<text text-anchor=\"middle\" x=\"823.5\" y=\"-616.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 0->1 -->\n<g id=\"edge1\" class=\"edge\">\n<title>0->1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M932.23,-712.88C916.97,-703.21 900.15,-692.56 884.41,-682.59\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"886.11,-679.52 875.79,-677.12 882.36,-685.43 886.11,-679.52\"/>\n<text text-anchor=\"middle\" x=\"881.26\" y=\"-697.82\" font-family=\"Times,serif\" font-size=\"14.00\">True</text>\n</g>\n<!-- 28 -->\n<g id=\"node29\" class=\"node\">\n<title>28</title>\n<polygon fill=\"#e78945\" stroke=\"black\" points=\"1110,-677 971,-677 971,-609 1110,-609 1110,-677\"/>\n<text text-anchor=\"middle\" x=\"1040.5\" y=\"-661.8\" font-family=\"Times,serif\" font-size=\"14.00\">texture error <= 0.473</text>\n<text text-anchor=\"middle\" x=\"1040.5\" y=\"-646.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 142</text>\n<text text-anchor=\"middle\" x=\"1040.5\" y=\"-631.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [134, 8]</text>\n<text text-anchor=\"middle\" x=\"1040.5\" y=\"-616.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 0->28 -->\n<g id=\"edge28\" class=\"edge\">\n<title>0->28</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1002.68,-712.88C1007.38,-704.33 1012.49,-695.01 1017.4,-686.07\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1020.47,-687.75 1022.22,-677.3 1014.34,-684.38 1020.47,-687.75\"/>\n<text text-anchor=\"middle\" x=\"1029.21\" y=\"-697.6\" font-family=\"Times,serif\" font-size=\"14.00\">False</text>\n</g>\n<!-- 2 -->\n<g id=\"node3\" class=\"node\">\n<title>2</title>\n<polygon fill=\"#3c9fe5\" stroke=\"black\" points=\"654.5,-573 520.5,-573 520.5,-505 654.5,-505 654.5,-573\"/>\n<text text-anchor=\"middle\" x=\"587.5\" y=\"-557.8\" font-family=\"Times,serif\" font-size=\"14.00\">radius error <= 1.048</text>\n<text text-anchor=\"middle\" x=\"587.5\" y=\"-542.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 252</text>\n<text text-anchor=\"middle\" x=\"587.5\" y=\"-527.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [4, 248]</text>\n<text text-anchor=\"middle\" x=\"587.5\" y=\"-512.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 1->2 -->\n<g id=\"edge2\" class=\"edge\">\n<title>1->2</title>\n<path fill=\"none\" stroke=\"black\" d=\"M746.88,-608.88C720.27,-597.38 690.45,-584.49 663.86,-573\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"665.17,-569.76 654.6,-569 662.39,-576.18 665.17,-569.76\"/>\n</g>\n<!-- 17 -->\n<g id=\"node18\" class=\"node\">\n<title>17</title>\n<polygon fill=\"#f3c3a1\" stroke=\"black\" points=\"895,-573 752,-573 752,-505 895,-505 895,-573\"/>\n<text text-anchor=\"middle\" x=\"823.5\" y=\"-557.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst texture <= 25.62</text>\n<text text-anchor=\"middle\" x=\"823.5\" y=\"-542.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 32</text>\n<text text-anchor=\"middle\" x=\"823.5\" y=\"-527.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [21, 11]</text>\n<text text-anchor=\"middle\" x=\"823.5\" y=\"-512.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 1->17 -->\n<g id=\"edge17\" class=\"edge\">\n<title>1->17</title>\n<path fill=\"none\" stroke=\"black\" d=\"M823.5,-608.88C823.5,-600.78 823.5,-591.98 823.5,-583.47\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"827,-583.3 823.5,-573.3 820,-583.3 827,-583.3\"/>\n</g>\n<!-- 3 -->\n<g id=\"node4\" class=\"node\">\n<title>3</title>\n<polygon fill=\"#3b9ee5\" stroke=\"black\" points=\"438,-469 273,-469 273,-401 438,-401 438,-469\"/>\n<text text-anchor=\"middle\" x=\"355.5\" y=\"-453.8\" font-family=\"Times,serif\" font-size=\"14.00\">smoothness error <= 0.003</text>\n<text text-anchor=\"middle\" x=\"355.5\" y=\"-438.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 251</text>\n<text text-anchor=\"middle\" x=\"355.5\" y=\"-423.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [3, 248]</text>\n<text text-anchor=\"middle\" x=\"355.5\" y=\"-408.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 2->3 -->\n<g id=\"edge3\" class=\"edge\">\n<title>2->3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M520.4,-508.5C495.26,-497.44 466.37,-484.74 439.86,-473.09\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"441.16,-469.84 430.6,-469.02 438.34,-476.25 441.16,-469.84\"/>\n</g>\n<!-- 16 -->\n<g id=\"node17\" class=\"node\">\n<title>16</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"644,-461.5 531,-461.5 531,-408.5 644,-408.5 644,-461.5\"/>\n<text text-anchor=\"middle\" x=\"587.5\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 1</text>\n<text text-anchor=\"middle\" x=\"587.5\" y=\"-431.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 0]</text>\n<text text-anchor=\"middle\" x=\"587.5\" y=\"-416.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 2->16 -->\n<g id=\"edge16\" class=\"edge\">\n<title>2->16</title>\n<path fill=\"none\" stroke=\"black\" d=\"M587.5,-504.88C587.5,-494.33 587.5,-482.6 587.5,-471.85\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"591,-471.52 587.5,-461.52 584,-471.52 591,-471.52\"/>\n</g>\n<!-- 4 -->\n<g id=\"node5\" class=\"node\">\n<title>4</title>\n<polygon fill=\"#7bbeee\" stroke=\"black\" points=\"237.5,-365 101.5,-365 101.5,-297 237.5,-297 237.5,-365\"/>\n<text text-anchor=\"middle\" x=\"169.5\" y=\"-349.8\" font-family=\"Times,serif\" font-size=\"14.00\">mean texture <= 19.9</text>\n<text text-anchor=\"middle\" x=\"169.5\" y=\"-334.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 4</text>\n<text text-anchor=\"middle\" x=\"169.5\" y=\"-319.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 3]</text>\n<text text-anchor=\"middle\" x=\"169.5\" y=\"-304.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 3->4 -->\n<g id=\"edge4\" class=\"edge\">\n<title>3->4</title>\n<path fill=\"none\" stroke=\"black\" d=\"M295.11,-400.88C277.15,-391.04 257.34,-380.17 238.86,-370.03\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"240.35,-366.86 229.9,-365.12 236.99,-373 240.35,-366.86\"/>\n</g>\n<!-- 7 -->\n<g id=\"node8\" class=\"node\">\n<title>7</title>\n<polygon fill=\"#3b9ee5\" stroke=\"black\" points=\"414,-365 297,-365 297,-297 414,-297 414,-365\"/>\n<text text-anchor=\"middle\" x=\"355.5\" y=\"-349.8\" font-family=\"Times,serif\" font-size=\"14.00\">area error <= 48.7</text>\n<text text-anchor=\"middle\" x=\"355.5\" y=\"-334.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 247</text>\n<text text-anchor=\"middle\" x=\"355.5\" y=\"-319.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [2, 245]</text>\n<text text-anchor=\"middle\" x=\"355.5\" y=\"-304.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 3->7 -->\n<g id=\"edge7\" class=\"edge\">\n<title>3->7</title>\n<path fill=\"none\" stroke=\"black\" d=\"M355.5,-400.88C355.5,-392.78 355.5,-383.98 355.5,-375.47\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"359,-375.3 355.5,-365.3 352,-375.3 359,-375.3\"/>\n</g>\n<!-- 5 -->\n<g id=\"node6\" class=\"node\">\n<title>5</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"95,-253.5 0,-253.5 0,-200.5 95,-200.5 95,-253.5\"/>\n<text text-anchor=\"middle\" x=\"47.5\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 3</text>\n<text text-anchor=\"middle\" x=\"47.5\" y=\"-223.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [0, 3]</text>\n<text text-anchor=\"middle\" x=\"47.5\" y=\"-208.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 4->5 -->\n<g id=\"edge5\" class=\"edge\">\n<title>4->5</title>\n<path fill=\"none\" stroke=\"black\" d=\"M129.89,-296.88C115.81,-285.12 100,-271.89 86.02,-260.2\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"87.94,-257.25 78.02,-253.52 83.45,-262.62 87.94,-257.25\"/>\n</g>\n<!-- 6 -->\n<g id=\"node7\" class=\"node\">\n<title>6</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"226,-253.5 113,-253.5 113,-200.5 226,-200.5 226,-253.5\"/>\n<text text-anchor=\"middle\" x=\"169.5\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 1</text>\n<text text-anchor=\"middle\" x=\"169.5\" y=\"-223.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 0]</text>\n<text text-anchor=\"middle\" x=\"169.5\" y=\"-208.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 4->6 -->\n<g id=\"edge6\" class=\"edge\">\n<title>4->6</title>\n<path fill=\"none\" stroke=\"black\" d=\"M169.5,-296.88C169.5,-286.33 169.5,-274.6 169.5,-263.85\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"173,-263.52 169.5,-253.52 166,-263.52 173,-263.52\"/>\n</g>\n<!-- 8 -->\n<g id=\"node9\" class=\"node\">\n<title>8</title>\n<polygon fill=\"#3a9de5\" stroke=\"black\" points=\"387,-261 244,-261 244,-193 387,-193 387,-261\"/>\n<text text-anchor=\"middle\" x=\"315.5\" y=\"-245.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst texture <= 33.35</text>\n<text text-anchor=\"middle\" x=\"315.5\" y=\"-230.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 243</text>\n<text text-anchor=\"middle\" x=\"315.5\" y=\"-215.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 242]</text>\n<text text-anchor=\"middle\" x=\"315.5\" y=\"-200.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 7->8 -->\n<g id=\"edge8\" class=\"edge\">\n<title>7->8</title>\n<path fill=\"none\" stroke=\"black\" d=\"M342.51,-296.88C339.23,-288.51 335.66,-279.4 332.22,-270.63\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"335.47,-269.33 328.56,-261.3 328.95,-271.89 335.47,-269.33\"/>\n</g>\n<!-- 13 -->\n<g id=\"node14\" class=\"node\">\n<title>13</title>\n<polygon fill=\"#7bbeee\" stroke=\"black\" points=\"563.5,-261 405.5,-261 405.5,-193 563.5,-193 563.5,-261\"/>\n<text text-anchor=\"middle\" x=\"484.5\" y=\"-245.8\" font-family=\"Times,serif\" font-size=\"14.00\">mean concavity <= 0.029</text>\n<text text-anchor=\"middle\" x=\"484.5\" y=\"-230.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 4</text>\n<text text-anchor=\"middle\" x=\"484.5\" y=\"-215.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 3]</text>\n<text text-anchor=\"middle\" x=\"484.5\" y=\"-200.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 7->13 -->\n<g id=\"edge13\" class=\"edge\">\n<title>7->13</title>\n<path fill=\"none\" stroke=\"black\" d=\"M397.38,-296.88C409.21,-287.53 422.2,-277.26 434.46,-267.57\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"436.71,-270.25 442.39,-261.3 432.37,-264.76 436.71,-270.25\"/>\n</g>\n<!-- 9 -->\n<g id=\"node10\" class=\"node\">\n<title>9</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"236,-149.5 131,-149.5 131,-96.5 236,-96.5 236,-149.5\"/>\n<text text-anchor=\"middle\" x=\"183.5\" y=\"-134.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 225</text>\n<text text-anchor=\"middle\" x=\"183.5\" y=\"-119.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [0, 225]</text>\n<text text-anchor=\"middle\" x=\"183.5\" y=\"-104.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 8->9 -->\n<g id=\"edge9\" class=\"edge\">\n<title>8->9</title>\n<path fill=\"none\" stroke=\"black\" d=\"M272.64,-192.88C257.27,-181.01 239.98,-167.65 224.75,-155.88\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"226.58,-152.86 216.53,-149.52 222.3,-158.4 226.58,-152.86\"/>\n</g>\n<!-- 10 -->\n<g id=\"node11\" class=\"node\">\n<title>10</title>\n<polygon fill=\"#45a3e7\" stroke=\"black\" points=\"390.5,-157 254.5,-157 254.5,-89 390.5,-89 390.5,-157\"/>\n<text text-anchor=\"middle\" x=\"322.5\" y=\"-141.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst texture <= 33.8</text>\n<text text-anchor=\"middle\" x=\"322.5\" y=\"-126.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 18</text>\n<text text-anchor=\"middle\" x=\"322.5\" y=\"-111.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 17]</text>\n<text text-anchor=\"middle\" x=\"322.5\" y=\"-96.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 8->10 -->\n<g id=\"edge10\" class=\"edge\">\n<title>8->10</title>\n<path fill=\"none\" stroke=\"black\" d=\"M317.77,-192.88C318.33,-184.78 318.93,-175.98 319.52,-167.47\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"323.02,-167.52 320.21,-157.3 316.04,-167.04 323.02,-167.52\"/>\n</g>\n<!-- 11 -->\n<g id=\"node12\" class=\"node\">\n<title>11</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"317,-53 204,-53 204,0 317,0 317,-53\"/>\n<text text-anchor=\"middle\" x=\"260.5\" y=\"-37.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 1</text>\n<text text-anchor=\"middle\" x=\"260.5\" y=\"-22.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 0]</text>\n<text text-anchor=\"middle\" x=\"260.5\" y=\"-7.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 10->11 -->\n<g id=\"edge11\" class=\"edge\">\n<title>10->11</title>\n<path fill=\"none\" stroke=\"black\" d=\"M300.82,-88.95C295.05,-80.17 288.82,-70.66 283.01,-61.82\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"285.8,-59.68 277.39,-53.24 279.94,-63.52 285.8,-59.68\"/>\n</g>\n<!-- 12 -->\n<g id=\"node13\" class=\"node\">\n<title>12</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"433.5,-53 335.5,-53 335.5,0 433.5,0 433.5,-53\"/>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-37.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 17</text>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-22.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [0, 17]</text>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-7.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 10->12 -->\n<g id=\"edge12\" class=\"edge\">\n<title>10->12</title>\n<path fill=\"none\" stroke=\"black\" d=\"M344.18,-88.95C349.95,-80.17 356.18,-70.66 361.99,-61.82\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"365.06,-63.52 367.61,-53.24 359.2,-59.68 365.06,-63.52\"/>\n</g>\n<!-- 14 -->\n<g id=\"node15\" class=\"node\">\n<title>14</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"535,-149.5 422,-149.5 422,-96.5 535,-96.5 535,-149.5\"/>\n<text text-anchor=\"middle\" x=\"478.5\" y=\"-134.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 1</text>\n<text text-anchor=\"middle\" x=\"478.5\" y=\"-119.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 0]</text>\n<text text-anchor=\"middle\" x=\"478.5\" y=\"-104.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 13->14 -->\n<g id=\"edge14\" class=\"edge\">\n<title>13->14</title>\n<path fill=\"none\" stroke=\"black\" d=\"M482.55,-192.88C481.92,-182.22 481.23,-170.35 480.59,-159.52\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"484.08,-159.3 480,-149.52 477.09,-159.71 484.08,-159.3\"/>\n</g>\n<!-- 15 -->\n<g id=\"node16\" class=\"node\">\n<title>15</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"648,-149.5 553,-149.5 553,-96.5 648,-96.5 648,-149.5\"/>\n<text text-anchor=\"middle\" x=\"600.5\" y=\"-134.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 3</text>\n<text text-anchor=\"middle\" x=\"600.5\" y=\"-119.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [0, 3]</text>\n<text text-anchor=\"middle\" x=\"600.5\" y=\"-104.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 13->15 -->\n<g id=\"edge15\" class=\"edge\">\n<title>13->15</title>\n<path fill=\"none\" stroke=\"black\" d=\"M522.16,-192.88C535.54,-181.12 550.58,-167.89 563.88,-156.2\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"566.28,-158.75 571.48,-149.52 561.66,-153.49 566.28,-158.75\"/>\n</g>\n<!-- 18 -->\n<g id=\"node19\" class=\"node\">\n<title>18</title>\n<polygon fill=\"#7bbeee\" stroke=\"black\" points=\"852.5,-469 724.5,-469 724.5,-401 852.5,-401 852.5,-469\"/>\n<text text-anchor=\"middle\" x=\"788.5\" y=\"-453.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst area <= 817.1</text>\n<text text-anchor=\"middle\" x=\"788.5\" y=\"-438.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 12</text>\n<text text-anchor=\"middle\" x=\"788.5\" y=\"-423.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [3, 9]</text>\n<text text-anchor=\"middle\" x=\"788.5\" y=\"-408.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 17->18 -->\n<g id=\"edge18\" class=\"edge\">\n<title>17->18</title>\n<path fill=\"none\" stroke=\"black\" d=\"M812.14,-504.88C809.29,-496.6 806.2,-487.6 803.22,-478.91\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"806.48,-477.62 799.93,-469.3 799.86,-479.89 806.48,-477.62\"/>\n</g>\n<!-- 23 -->\n<g id=\"node24\" class=\"node\">\n<title>23</title>\n<polygon fill=\"#e88f4f\" stroke=\"black\" points=\"1042,-469 881,-469 881,-401 1042,-401 1042,-469\"/>\n<text text-anchor=\"middle\" x=\"961.5\" y=\"-453.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst symmetry <= 0.268</text>\n<text text-anchor=\"middle\" x=\"961.5\" y=\"-438.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 20</text>\n<text text-anchor=\"middle\" x=\"961.5\" y=\"-423.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [18, 2]</text>\n<text text-anchor=\"middle\" x=\"961.5\" y=\"-408.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 17->23 -->\n<g id=\"edge23\" class=\"edge\">\n<title>17->23</title>\n<path fill=\"none\" stroke=\"black\" d=\"M868.3,-504.88C881.08,-495.44 895.12,-485.06 908.34,-475.29\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"910.49,-478.06 916.45,-469.3 906.33,-472.43 910.49,-478.06\"/>\n</g>\n<!-- 19 -->\n<g id=\"node20\" class=\"node\">\n<title>19</title>\n<polygon fill=\"#4fa8e8\" stroke=\"black\" points=\"714,-365 545,-365 545,-297 714,-297 714,-365\"/>\n<text text-anchor=\"middle\" x=\"629.5\" y=\"-349.8\" font-family=\"Times,serif\" font-size=\"14.00\">mean smoothness <= 0.123</text>\n<text text-anchor=\"middle\" x=\"629.5\" y=\"-334.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 10</text>\n<text text-anchor=\"middle\" x=\"629.5\" y=\"-319.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 9]</text>\n<text text-anchor=\"middle\" x=\"629.5\" y=\"-304.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 18->19 -->\n<g id=\"edge19\" class=\"edge\">\n<title>18->19</title>\n<path fill=\"none\" stroke=\"black\" d=\"M736.88,-400.88C721.8,-391.21 705.2,-380.56 689.65,-370.59\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"691.44,-367.58 681.14,-365.12 687.66,-373.47 691.44,-367.58\"/>\n</g>\n<!-- 22 -->\n<g id=\"node23\" class=\"node\">\n<title>22</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"845,-357.5 732,-357.5 732,-304.5 845,-304.5 845,-357.5\"/>\n<text text-anchor=\"middle\" x=\"788.5\" y=\"-342.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 2</text>\n<text text-anchor=\"middle\" x=\"788.5\" y=\"-327.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [2, 0]</text>\n<text text-anchor=\"middle\" x=\"788.5\" y=\"-312.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 18->22 -->\n<g id=\"edge22\" class=\"edge\">\n<title>18->22</title>\n<path fill=\"none\" stroke=\"black\" d=\"M788.5,-400.88C788.5,-390.33 788.5,-378.6 788.5,-367.85\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"792,-367.52 788.5,-357.52 785,-367.52 792,-367.52\"/>\n</g>\n<!-- 20 -->\n<g id=\"node21\" class=\"node\">\n<title>20</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"677,-253.5 582,-253.5 582,-200.5 677,-200.5 677,-253.5\"/>\n<text text-anchor=\"middle\" x=\"629.5\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 9</text>\n<text text-anchor=\"middle\" x=\"629.5\" y=\"-223.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [0, 9]</text>\n<text text-anchor=\"middle\" x=\"629.5\" y=\"-208.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 19->20 -->\n<g id=\"edge20\" class=\"edge\">\n<title>19->20</title>\n<path fill=\"none\" stroke=\"black\" d=\"M629.5,-296.88C629.5,-286.33 629.5,-274.6 629.5,-263.85\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"633,-263.52 629.5,-253.52 626,-263.52 633,-263.52\"/>\n</g>\n<!-- 21 -->\n<g id=\"node22\" class=\"node\">\n<title>21</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"808,-253.5 695,-253.5 695,-200.5 808,-200.5 808,-253.5\"/>\n<text text-anchor=\"middle\" x=\"751.5\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 1</text>\n<text text-anchor=\"middle\" x=\"751.5\" y=\"-223.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 0]</text>\n<text text-anchor=\"middle\" x=\"751.5\" y=\"-208.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 19->21 -->\n<g id=\"edge21\" class=\"edge\">\n<title>19->21</title>\n<path fill=\"none\" stroke=\"black\" d=\"M669.11,-296.88C683.19,-285.12 699,-271.89 712.98,-260.2\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"715.55,-262.62 720.98,-253.52 711.06,-257.25 715.55,-262.62\"/>\n</g>\n<!-- 24 -->\n<g id=\"node25\" class=\"node\">\n<title>24</title>\n<polygon fill=\"#9ccef2\" stroke=\"black\" points=\"1060,-365 863,-365 863,-297 1060,-297 1060,-365\"/>\n<text text-anchor=\"middle\" x=\"961.5\" y=\"-349.8\" font-family=\"Times,serif\" font-size=\"14.00\">fractal dimension error <= 0.002</text>\n<text text-anchor=\"middle\" x=\"961.5\" y=\"-334.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 3</text>\n<text text-anchor=\"middle\" x=\"961.5\" y=\"-319.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 2]</text>\n<text text-anchor=\"middle\" x=\"961.5\" y=\"-304.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 23->24 -->\n<g id=\"edge24\" class=\"edge\">\n<title>23->24</title>\n<path fill=\"none\" stroke=\"black\" d=\"M961.5,-400.88C961.5,-392.78 961.5,-383.98 961.5,-375.47\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"965,-375.3 961.5,-365.3 958,-375.3 965,-375.3\"/>\n</g>\n<!-- 27 -->\n<g id=\"node28\" class=\"node\">\n<title>27</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"1191,-357.5 1078,-357.5 1078,-304.5 1191,-304.5 1191,-357.5\"/>\n<text text-anchor=\"middle\" x=\"1134.5\" y=\"-342.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 17</text>\n<text text-anchor=\"middle\" x=\"1134.5\" y=\"-327.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [17, 0]</text>\n<text text-anchor=\"middle\" x=\"1134.5\" y=\"-312.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 23->27 -->\n<g id=\"edge27\" class=\"edge\">\n<title>23->27</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1017.67,-400.88C1038.47,-388.62 1061.94,-374.78 1082.37,-362.74\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1084.38,-365.61 1091.22,-357.52 1080.82,-359.58 1084.38,-365.61\"/>\n</g>\n<!-- 25 -->\n<g id=\"node26\" class=\"node\">\n<title>25</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"979,-253.5 866,-253.5 866,-200.5 979,-200.5 979,-253.5\"/>\n<text text-anchor=\"middle\" x=\"922.5\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 1</text>\n<text text-anchor=\"middle\" x=\"922.5\" y=\"-223.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [1, 0]</text>\n<text text-anchor=\"middle\" x=\"922.5\" y=\"-208.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 24->25 -->\n<g id=\"edge25\" class=\"edge\">\n<title>24->25</title>\n<path fill=\"none\" stroke=\"black\" d=\"M948.84,-296.88C944.72,-286.11 940.13,-274.11 935.95,-263.18\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"939.1,-261.61 932.26,-253.52 932.56,-264.11 939.1,-261.61\"/>\n</g>\n<!-- 26 -->\n<g id=\"node27\" class=\"node\">\n<title>26</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"1092,-253.5 997,-253.5 997,-200.5 1092,-200.5 1092,-253.5\"/>\n<text text-anchor=\"middle\" x=\"1044.5\" y=\"-238.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 2</text>\n<text text-anchor=\"middle\" x=\"1044.5\" y=\"-223.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [0, 2]</text>\n<text text-anchor=\"middle\" x=\"1044.5\" y=\"-208.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 24->26 -->\n<g id=\"edge26\" class=\"edge\">\n<title>24->26</title>\n<path fill=\"none\" stroke=\"black\" d=\"M988.45,-296.88C997.67,-285.56 1007.98,-272.88 1017.22,-261.52\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1020.14,-263.49 1023.73,-253.52 1014.71,-259.07 1020.14,-263.49\"/>\n</g>\n<!-- 29 -->\n<g id=\"node30\" class=\"node\">\n<title>29</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"1088,-565.5 993,-565.5 993,-512.5 1088,-512.5 1088,-565.5\"/>\n<text text-anchor=\"middle\" x=\"1040.5\" y=\"-550.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 5</text>\n<text text-anchor=\"middle\" x=\"1040.5\" y=\"-535.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [0, 5]</text>\n<text text-anchor=\"middle\" x=\"1040.5\" y=\"-520.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 28->29 -->\n<g id=\"edge29\" class=\"edge\">\n<title>28->29</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1040.5,-608.88C1040.5,-598.33 1040.5,-586.6 1040.5,-575.85\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1044,-575.52 1040.5,-565.52 1037,-575.52 1044,-575.52\"/>\n</g>\n<!-- 30 -->\n<g id=\"node31\" class=\"node\">\n<title>30</title>\n<polygon fill=\"#e6843d\" stroke=\"black\" points=\"1335.5,-573 1177.5,-573 1177.5,-505 1335.5,-505 1335.5,-573\"/>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-557.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst concavity <= 0.191</text>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-542.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 137</text>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-527.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [134, 3]</text>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-512.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 28->30 -->\n<g id=\"edge30\" class=\"edge\">\n<title>28->30</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1110.03,-609.17C1131.4,-599.08 1155.08,-587.89 1177.05,-577.52\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1178.81,-580.56 1186.36,-573.12 1175.82,-574.23 1178.81,-580.56\"/>\n</g>\n<!-- 31 -->\n<g id=\"node32\" class=\"node\">\n<title>31</title>\n<polygon fill=\"#bddef6\" stroke=\"black\" points=\"1331,-469 1182,-469 1182,-401 1331,-401 1331,-469\"/>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-453.8\" font-family=\"Times,serif\" font-size=\"14.00\">worst texture <= 30.975</text>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-438.8\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 5</text>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-423.8\" font-family=\"Times,serif\" font-size=\"14.00\">value = [2, 3]</text>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-408.8\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 30->31 -->\n<g id=\"edge31\" class=\"edge\">\n<title>30->31</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1256.5,-504.88C1256.5,-496.78 1256.5,-487.98 1256.5,-479.47\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1260,-479.3 1256.5,-469.3 1253,-479.3 1260,-479.3\"/>\n</g>\n<!-- 34 -->\n<g id=\"node35\" class=\"node\">\n<title>34</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"1462,-461.5 1349,-461.5 1349,-408.5 1462,-408.5 1462,-461.5\"/>\n<text text-anchor=\"middle\" x=\"1405.5\" y=\"-446.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 132</text>\n<text text-anchor=\"middle\" x=\"1405.5\" y=\"-431.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [132, 0]</text>\n<text text-anchor=\"middle\" x=\"1405.5\" y=\"-416.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 30->34 -->\n<g id=\"edge34\" class=\"edge\">\n<title>30->34</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1304.88,-504.88C1322.55,-492.79 1342.47,-479.15 1359.89,-467.22\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1361.95,-470.06 1368.22,-461.52 1357.99,-464.28 1361.95,-470.06\"/>\n</g>\n<!-- 32 -->\n<g id=\"node33\" class=\"node\">\n<title>32</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"1304,-357.5 1209,-357.5 1209,-304.5 1304,-304.5 1304,-357.5\"/>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-342.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 3</text>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-327.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [0, 3]</text>\n<text text-anchor=\"middle\" x=\"1256.5\" y=\"-312.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = benign</text>\n</g>\n<!-- 31->32 -->\n<g id=\"edge32\" class=\"edge\">\n<title>31->32</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1256.5,-400.88C1256.5,-390.33 1256.5,-378.6 1256.5,-367.85\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1260,-367.52 1256.5,-357.52 1253,-367.52 1260,-367.52\"/>\n</g>\n<!-- 33 -->\n<g id=\"node34\" class=\"node\">\n<title>33</title>\n<polygon fill=\"#e58139\" stroke=\"black\" points=\"1435,-357.5 1322,-357.5 1322,-304.5 1435,-304.5 1435,-357.5\"/>\n<text text-anchor=\"middle\" x=\"1378.5\" y=\"-342.3\" font-family=\"Times,serif\" font-size=\"14.00\">samples = 2</text>\n<text text-anchor=\"middle\" x=\"1378.5\" y=\"-327.3\" font-family=\"Times,serif\" font-size=\"14.00\">value = [2, 0]</text>\n<text text-anchor=\"middle\" x=\"1378.5\" y=\"-312.3\" font-family=\"Times,serif\" font-size=\"14.00\">class = malignant</text>\n</g>\n<!-- 31->33 -->\n<g id=\"edge33\" class=\"edge\">\n<title>31->33</title>\n<path fill=\"none\" stroke=\"black\" d=\"M1296.11,-400.88C1310.19,-389.12 1326,-375.89 1339.98,-364.2\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"1342.55,-366.62 1347.98,-357.52 1338.06,-361.25 1342.55,-366.62\"/>\n</g>\n</g>\n</svg>\n"},"metadata":{}}]},{"cell_type":"code","source":"print(\"Feature importances:\")\nprint(tree.feature_importances_)","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:22:27.114215Z","iopub.execute_input":"2022-12-01T13:22:27.114789Z","iopub.status.idle":"2022-12-01T13:22:27.123148Z","shell.execute_reply.started":"2022-12-01T13:22:27.114749Z","shell.execute_reply":"2022-12-01T13:22:27.121501Z"},"trusted":true},"execution_count":13,"outputs":[{"name":"stdout","text":"Feature importances:\n[0. 0.00752597 0. 0. 0.00903116 0.\n 0.00752597 0. 0. 0. 0.00975731 0.04630969\n 0. 0.00238745 0.00231135 0. 0. 0.\n 0. 0.00668975 0.69546322 0.05383211 0. 0.01354675\n 0. 0. 0.01740312 0.11684357 0.01137258 0. ]\n","output_type":"stream"}]},{"cell_type":"code","source":"import matplotlib.pyplot as plt\n#bar chart of feature importances\ndef plot_feature_importances_cancer(model):\n n_features = cancer.data.shape[1]\n plt.figure(figsize=(8,20))\n plt.barh(np.arange(n_features), model.feature_importances_, align='center')\n plt.yticks(np.arange(n_features), cancer.feature_names)\n plt.xlabel(\"Feature importance\")\n plt.ylabel(\"Feature\")\n plt.ylim(-1, n_features)\n plt.savefig('testfig.png',dpi=300, bbox_inches = \"tight\")\n\nplot_feature_importances_cancer(tree)","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:22:29.532843Z","iopub.execute_input":"2022-12-01T13:22:29.533165Z","iopub.status.idle":"2022-12-01T13:22:31.008940Z","shell.execute_reply.started":"2022-12-01T13:22:29.533139Z","shell.execute_reply":"2022-12-01T13:22:31.008108Z"},"trusted":true},"execution_count":14,"outputs":[{"output_type":"display_data","data":{"text/plain":"<Figure size 576x1440 with 1 Axes>","image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":"**We will now apply cost complexity pruning (post-pruning) to our tree to reduce the size and overfitting.**","metadata":{}},{"cell_type":"code","source":"path = tree.cost_complexity_pruning_path(X_train, y_train)\nccp_alphas, impurities = path.ccp_alphas, path.impurities","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:22:36.829875Z","iopub.execute_input":"2022-12-01T13:22:36.830481Z","iopub.status.idle":"2022-12-01T13:22:36.846093Z","shell.execute_reply.started":"2022-12-01T13:22:36.830450Z","shell.execute_reply":"2022-12-01T13:22:36.843969Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"print(ccp_alphas)","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:22:39.720460Z","iopub.execute_input":"2022-12-01T13:22:39.720843Z","iopub.status.idle":"2022-12-01T13:22:39.733881Z","shell.execute_reply.started":"2022-12-01T13:22:39.720804Z","shell.execute_reply":"2022-12-01T13:22:39.730426Z"},"trusted":true},"execution_count":16,"outputs":[{"name":"stdout","text":"[0. 0.00231936 0.00312989 0.00422535 0.00456509 0.00532081\n 0.0056338 0.00633803 0.00814228 0.01487676 0.02166662 0.05466684\n 0.32538187]\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Model hosting using skops 🤗\nWe will now initialize a repository and save a model and a model card in it. ","metadata":{}},{"cell_type":"code","source":"from skops import hub_utils, card\nimport os\nimport joblib\n\n# create a directory to initialize our repo\nlocal_repo = \"./model_dir\"\n# save the model\npkl_path = \"./model.pkl\"\njoblib.dump(tree, pkl_path)\n\n# initialize the repository \nhub_utils.init(model=pkl_path, \n task=\"tabular-classification\",\n requirements=[\"scikit-learn\"], \n dst=local_repo,\n data=X_train)\n\n# see what's inside the repository\nprint(os.listdir(local_repo))","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:23:19.683597Z","iopub.execute_input":"2022-12-01T13:23:19.684289Z","iopub.status.idle":"2022-12-01T13:23:19.694628Z","shell.execute_reply.started":"2022-12-01T13:23:19.684241Z","shell.execute_reply":"2022-12-01T13:23:19.693726Z"},"trusted":true},"execution_count":19,"outputs":[{"name":"stdout","text":"['config.json', 'model.pkl']\n","output_type":"stream"}]},{"cell_type":"markdown","source":"We will now initialize a model card and add information.","metadata":{}},{"cell_type":"code","source":"from pathlib import Path\nmodel_card = card.Card(tree, metadata=card.metadata_from_config(Path(local_repo)))","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:23:23.949248Z","iopub.execute_input":"2022-12-01T13:23:23.949851Z","iopub.status.idle":"2022-12-01T13:23:23.956738Z","shell.execute_reply.started":"2022-12-01T13:23:23.949805Z","shell.execute_reply":"2022-12-01T13:23:23.955196Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"code","source":"description = \"This is a Decision Tree Classifier trained on breast cancer dataset and pruned with CCP.\"\nlimitations = \"This model is trained for educational purposes.\"\nmodel_card.add(model_description = description,\n limitations = limitations)\n","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:23:25.733610Z","iopub.execute_input":"2022-12-01T13:23:25.733946Z","iopub.status.idle":"2022-12-01T13:23:25.742664Z","shell.execute_reply.started":"2022-12-01T13:23:25.733916Z","shell.execute_reply":"2022-12-01T13:23:25.741153Z"},"trusted":true},"execution_count":21,"outputs":[{"execution_count":21,"output_type":"execute_result","data":{"text/plain":"Card(\n model=DecisionTreeClassifier(random_state=0),\n metadata.library_name=sklearn,\n metadata.tags=['sklearn', 'skops', 'tabular-classification'],\n metadata.model_file=model.pkl,\n metadata.widget={...},\n model_description='This is a Decisi...cancer dataset and pruned with CCP.',\n limitations='This model is trained for educational purposes.',\n)"},"metadata":{}}]},{"cell_type":"markdown","source":"We will add the plots we've visualized above.","metadata":{}},{"cell_type":"code","source":"# save feature importance bar chart\nplot_feature_importances_cancer(tree)\nplt.savefig(Path(local_repo) / 'feature_importances.png')\n# save graph\ngraph.write_png(Path(local_repo) / 'tree.png')\n\n# write the plots to model card\nmodel_card.add_plot(**{\"Feature Importances\": 'feature_importances.png',\n \"Tree Splits\": 'tree.png'})","metadata":{"_kg_hide-output":true,"execution":{"iopub.status.busy":"2022-12-01T13:23:30.323398Z","iopub.execute_input":"2022-12-01T13:23:30.323754Z","iopub.status.idle":"2022-12-01T13:23:32.254691Z","shell.execute_reply.started":"2022-12-01T13:23:30.323721Z","shell.execute_reply":"2022-12-01T13:23:32.253135Z"},"trusted":true},"execution_count":22,"outputs":[{"execution_count":22,"output_type":"execute_result","data":{"text/plain":"Card(\n model=DecisionTreeClassifier(random_state=0),\n metadata.library_name=sklearn,\n metadata.tags=['sklearn', 'skops', 'tabular-classification'],\n metadata.model_file=model.pkl,\n metadata.widget={...},\n model_description='This is a Decisi...cancer dataset and pruned with CCP.',\n limitations='This model is trained for educational purposes.',\n Feature Importances='feature_importances.png',\n Tree Splits='tree.png',\n)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<Figure size 576x1440 with 1 Axes>","image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":"We can save confusion matrix.","metadata":{}},{"cell_type":"code","source":"from sklearn.metrics import (\n ConfusionMatrixDisplay,\n accuracy_score,\n classification_report,\n confusion_matrix,\n f1_score,\n)\n# add metrics to our model card\naccuracy = accuracy_score(y_test, y_pred)\nf1 = f1_score(y_test, y_pred, average=\"micro\")\nmodel_card.add_metrics(**{\"accuracy\": accuracy, \"f1 score\": f1})","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:23:41.422503Z","iopub.execute_input":"2022-12-01T13:23:41.422904Z","iopub.status.idle":"2022-12-01T13:23:41.436927Z","shell.execute_reply.started":"2022-12-01T13:23:41.422867Z","shell.execute_reply":"2022-12-01T13:23:41.435224Z"},"trusted":true},"execution_count":23,"outputs":[{"execution_count":23,"output_type":"execute_result","data":{"text/plain":"Card(\n model=DecisionTreeClassifier(random_state=0),\n metadata.library_name=sklearn,\n metadata.tags=['sklearn', 'skops', 'tabular-classification'],\n metadata.model_file=model.pkl,\n metadata.widget={...},\n model_description='This is a Decisi...cancer dataset and pruned with CCP.',\n limitations='This model is trained for educational purposes.',\n Feature Importances='feature_importances.png',\n Tree Splits='tree.png',\n)"},"metadata":{}}]},{"cell_type":"code","source":"cm = confusion_matrix(y_test, y_pred, labels=tree.classes_)\ndisp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=tree.classes_)\ndisp.plot()\n# save the figure to repo\ndisp.figure_.savefig(Path(local_repo) / \"confusion_matrix.png\")\n# write the figure to model card\nmodel_card.add_plot(**{\"Confusion Matrix\": \"confusion_matrix.png\"})","metadata":{"_kg_hide-output":true,"execution":{"iopub.status.busy":"2022-12-01T13:23:44.711392Z","iopub.execute_input":"2022-12-01T13:23:44.711727Z","iopub.status.idle":"2022-12-01T13:23:44.947598Z","shell.execute_reply.started":"2022-12-01T13:23:44.711697Z","shell.execute_reply":"2022-12-01T13:23:44.945571Z"},"trusted":true},"execution_count":24,"outputs":[{"execution_count":24,"output_type":"execute_result","data":{"text/plain":"Card(\n model=DecisionTreeClassifier(random_state=0),\n metadata.library_name=sklearn,\n metadata.tags=['sklearn', 'skops', 'tabular-classification'],\n metadata.model_file=model.pkl,\n metadata.widget={...},\n model_description='This is a Decisi...cancer dataset and pruned with CCP.',\n limitations='This model is trained for educational purposes.',\n Feature Importances='feature_importances.png',\n Tree Splits='tree.png',\n Confusion Matrix='confusion_matrix.png',\n)"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<Figure size 432x288 with 2 Axes>","image/png":"\n"},"metadata":{"needs_background":"light"}}]},{"cell_type":"markdown","source":"We can now save the model card and push our repository to 🤗Hub!","metadata":{}},{"cell_type":"code","source":"model_card.save(Path(local_repo) / \"README.md\")","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:23:53.269801Z","iopub.execute_input":"2022-12-01T13:23:53.270182Z","iopub.status.idle":"2022-12-01T13:23:53.327379Z","shell.execute_reply.started":"2022-12-01T13:23:53.270150Z","shell.execute_reply":"2022-12-01T13:23:53.326180Z"},"trusted":true},"execution_count":25,"outputs":[]},{"cell_type":"markdown","source":"We will now push the model to 🤗Hub. For this, we firstly need to authenticate ourselves. Then, we can push our model!","metadata":{}},{"cell_type":"code","source":"from huggingface_hub import notebook_login\nnotebook_login()","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:23:54.521519Z","iopub.execute_input":"2022-12-01T13:23:54.521921Z","iopub.status.idle":"2022-12-01T13:23:54.578232Z","shell.execute_reply.started":"2022-12-01T13:23:54.521883Z","shell.execute_reply":"2022-12-01T13:23:54.576764Z"},"trusted":true},"execution_count":26,"outputs":[{"output_type":"display_data","data":{"text/plain":"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"6d1a4c50a6b44944b7a57710cdb86ab3"}},"metadata":{}}]},{"cell_type":"code","source":"hub_utils.push(repo_id = \"scikit-learn/cancer-prediction-trees\",\n source = local_repo,\n create_remote = True)","metadata":{"execution":{"iopub.status.busy":"2022-12-01T13:24:15.673795Z","iopub.execute_input":"2022-12-01T13:24:15.674244Z","iopub.status.idle":"2022-12-01T13:24:21.694500Z","shell.execute_reply.started":"2022-12-01T13:24:15.674199Z","shell.execute_reply":"2022-12-01T13:24:21.692858Z"},"trusted":true},"execution_count":27,"outputs":[]}]} |