{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.7.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"# This Python 3 environment comes with many helpful analytics libraries installed\n# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python\n# For example, here's several helpful packages to load\n\nimport numpy as np # linear algebra\nimport pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n\n# Input data files are available in the read-only \"../input/\" directory\n# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory\n\nimport os\nfor dirname, _, filenames in os.walk('/kaggle/input'):\n for filename in filenames:\n print(os.path.join(dirname, filename))\n\n# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using \"Save & Run All\" \n# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session","metadata":{"_uuid":"8f2839f25d086af736a60e9eeb907d3b93b6e0e5","_cell_guid":"b1076dfc-b9ad-4769-8c92-a6c4dae69d19","execution":{"iopub.status.busy":"2022-08-12T16:07:49.750037Z","iopub.execute_input":"2022-08-12T16:07:49.750515Z","iopub.status.idle":"2022-08-12T16:07:49.761989Z","shell.execute_reply.started":"2022-08-12T16:07:49.750473Z","shell.execute_reply":"2022-08-12T16:07:49.760803Z"},"trusted":true},"execution_count":34,"outputs":[{"name":"stdout","text":"/kaggle/input/tabular-playground-series-aug-2022/sample_submission.csv\n/kaggle/input/tabular-playground-series-aug-2022/train.csv\n/kaggle/input/tabular-playground-series-aug-2022/test.csv\n","output_type":"stream"}]},{"cell_type":"markdown","source":"## Using skops to host your models on Hugging Face Hub\nThis notebook shows you how you can use [skops](https://skops.readthedocs.io/) to improve your data science workflows with scikit-learn. We will have end-to-end example for Kaggle Tabular Playground Series of August 2022.","metadata":{}},{"cell_type":"markdown","source":"## Install skops","metadata":{}},{"cell_type":"code","source":"#!pip install skops","metadata":{"execution":{"iopub.status.busy":"2022-08-12T16:42:20.000537Z","iopub.execute_input":"2022-08-12T16:42:20.000960Z","iopub.status.idle":"2022-08-12T16:42:20.005212Z","shell.execute_reply.started":"2022-08-12T16:42:20.000926Z","shell.execute_reply":"2022-08-12T16:42:20.004298Z"},"trusted":true},"execution_count":58,"outputs":[]},{"cell_type":"markdown","source":"## Import libraries","metadata":{}},{"cell_type":"code","source":"import skops\nimport sklearn\nimport matplotlib.pyplot as plt","metadata":{"execution":{"iopub.status.busy":"2022-08-12T16:08:01.273144Z","iopub.execute_input":"2022-08-12T16:08:01.273524Z","iopub.status.idle":"2022-08-12T16:08:01.279217Z","shell.execute_reply.started":"2022-08-12T16:08:01.273487Z","shell.execute_reply":"2022-08-12T16:08:01.277670Z"},"trusted":true},"execution_count":36,"outputs":[]},{"cell_type":"markdown","source":"## Let's take a look at the dataset\nTarget variable is a binary category. We have couple of numerical and categorical variables.","metadata":{}},{"cell_type":"code","source":"df = pd.read_csv(\"../input/tabular-playground-series-aug-2022/train.csv\")\ndf.head()","metadata":{"execution":{"iopub.status.busy":"2022-08-12T16:08:01.280555Z","iopub.execute_input":"2022-08-12T16:08:01.280918Z","iopub.status.idle":"2022-08-12T16:08:01.433127Z","shell.execute_reply.started":"2022-08-12T16:08:01.280882Z","shell.execute_reply":"2022-08-12T16:08:01.431902Z"},"trusted":true},"execution_count":37,"outputs":[{"execution_count":37,"output_type":"execute_result","data":{"text/plain":" id product_code loading attribute_0 attribute_1 attribute_2 attribute_3 \\\n0 0 A 80.10 material_7 material_8 9 5 \n1 1 A 84.89 material_7 material_8 9 5 \n2 2 A 82.43 material_7 material_8 9 5 \n3 3 A 101.07 material_7 material_8 9 5 \n4 4 A 188.06 material_7 material_8 9 5 \n\n measurement_0 measurement_1 measurement_2 ... measurement_9 \\\n0 7 8 4 ... 10.672 \n1 14 3 3 ... 12.448 \n2 12 1 5 ... 12.715 \n3 13 2 6 ... 12.471 \n4 9 2 8 ... 10.337 \n\n measurement_10 measurement_11 measurement_12 measurement_13 \\\n0 15.859 17.594 15.193 15.029 \n1 17.947 17.915 11.755 14.732 \n2 15.607 NaN 13.798 16.711 \n3 16.346 18.377 10.020 15.250 \n4 17.082 19.932 12.428 16.182 \n\n measurement_14 measurement_15 measurement_16 measurement_17 failure \n0 NaN 13.034 14.684 764.100 0 \n1 15.425 14.395 15.631 682.057 0 \n2 18.631 14.094 17.946 663.376 0 \n3 15.562 16.154 17.172 826.282 0 \n4 12.760 13.153 16.412 579.885 0 \n\n[5 rows x 26 columns]","text/html":"
\n | id | \nproduct_code | \nloading | \nattribute_0 | \nattribute_1 | \nattribute_2 | \nattribute_3 | \nmeasurement_0 | \nmeasurement_1 | \nmeasurement_2 | \n... | \nmeasurement_9 | \nmeasurement_10 | \nmeasurement_11 | \nmeasurement_12 | \nmeasurement_13 | \nmeasurement_14 | \nmeasurement_15 | \nmeasurement_16 | \nmeasurement_17 | \nfailure | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n0 | \nA | \n80.10 | \nmaterial_7 | \nmaterial_8 | \n9 | \n5 | \n7 | \n8 | \n4 | \n... | \n10.672 | \n15.859 | \n17.594 | \n15.193 | \n15.029 | \nNaN | \n13.034 | \n14.684 | \n764.100 | \n0 | \n
1 | \n1 | \nA | \n84.89 | \nmaterial_7 | \nmaterial_8 | \n9 | \n5 | \n14 | \n3 | \n3 | \n... | \n12.448 | \n17.947 | \n17.915 | \n11.755 | \n14.732 | \n15.425 | \n14.395 | \n15.631 | \n682.057 | \n0 | \n
2 | \n2 | \nA | \n82.43 | \nmaterial_7 | \nmaterial_8 | \n9 | \n5 | \n12 | \n1 | \n5 | \n... | \n12.715 | \n15.607 | \nNaN | \n13.798 | \n16.711 | \n18.631 | \n14.094 | \n17.946 | \n663.376 | \n0 | \n
3 | \n3 | \nA | \n101.07 | \nmaterial_7 | \nmaterial_8 | \n9 | \n5 | \n13 | \n2 | \n6 | \n... | \n12.471 | \n16.346 | \n18.377 | \n10.020 | \n15.250 | \n15.562 | \n16.154 | \n17.172 | \n826.282 | \n0 | \n
4 | \n4 | \nA | \n188.06 | \nmaterial_7 | \nmaterial_8 | \n9 | \n5 | \n9 | \n2 | \n8 | \n... | \n10.337 | \n17.082 | \n19.932 | \n12.428 | \n16.182 | \n12.760 | \n13.153 | \n16.412 | \n579.885 | \n0 | \n
5 rows × 26 columns
\n\n | id | \nloading | \nattribute_2 | \nattribute_3 | \nmeasurement_0 | \nmeasurement_1 | \nmeasurement_2 | \nmeasurement_3 | \nmeasurement_4 | \nmeasurement_5 | \n... | \nmeasurement_9 | \nmeasurement_10 | \nmeasurement_11 | \nmeasurement_12 | \nmeasurement_13 | \nmeasurement_14 | \nmeasurement_15 | \nmeasurement_16 | \nmeasurement_17 | \nfailure | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n26570.000000 | \n26320.000000 | \n26570.000000 | \n26570.000000 | \n26570.000000 | \n26570.000000 | \n26570.000000 | \n26189.000000 | \n26032.000000 | \n25894.000000 | \n... | \n25343.000000 | \n25270.000000 | \n25102.000000 | \n24969.000000 | \n24796.000000 | \n24696.000000 | \n24561.000000 | \n24460.000000 | \n24286.000000 | \n26570.000000 | \n
mean | \n13284.500000 | \n127.826233 | \n6.754046 | \n7.240459 | \n7.415883 | \n8.232518 | \n6.256568 | \n17.791528 | \n11.731988 | \n17.127804 | \n... | \n11.430725 | \n16.117711 | \n19.172085 | \n11.702464 | \n15.652904 | \n16.048444 | \n14.995554 | \n16.460727 | \n701.269059 | \n0.212608 | \n
std | \n7670.242662 | \n39.030020 | \n1.471852 | \n1.456493 | \n4.116690 | \n4.199401 | \n3.309109 | \n1.001200 | \n0.996085 | \n0.996414 | \n... | \n0.999137 | \n1.405978 | \n1.520785 | \n1.488838 | \n1.155247 | \n1.491923 | \n1.549226 | \n1.708935 | \n123.304161 | \n0.409160 | \n
min | \n0.000000 | \n33.160000 | \n5.000000 | \n5.000000 | \n0.000000 | \n0.000000 | \n0.000000 | \n13.968000 | \n8.008000 | \n12.073000 | \n... | \n7.537000 | \n9.323000 | \n12.461000 | \n5.167000 | \n10.890000 | \n9.140000 | \n9.104000 | \n9.701000 | \n196.787000 | \n0.000000 | \n
25% | \n6642.250000 | \n99.987500 | \n6.000000 | \n6.000000 | \n4.000000 | \n5.000000 | \n4.000000 | \n17.117000 | \n11.051000 | \n16.443000 | \n... | \n10.757000 | \n15.209000 | \n18.170000 | \n10.703000 | \n14.890000 | \n15.057000 | \n13.957000 | \n15.268000 | \n618.961500 | \n0.000000 | \n
50% | \n13284.500000 | \n122.390000 | \n6.000000 | \n8.000000 | \n7.000000 | \n8.000000 | \n6.000000 | \n17.787000 | \n11.733000 | \n17.132000 | \n... | \n11.430000 | \n16.127000 | \n19.211500 | \n11.717000 | \n15.628500 | \n16.040000 | \n14.969000 | \n16.436000 | \n701.024500 | \n0.000000 | \n
75% | \n19926.750000 | \n149.152500 | \n8.000000 | \n8.000000 | \n10.000000 | \n11.000000 | \n8.000000 | \n18.469000 | \n12.410000 | \n17.805000 | \n... | \n12.102000 | \n17.025000 | \n20.207000 | \n12.709000 | \n16.374000 | \n17.082000 | \n16.018000 | \n17.628000 | \n784.090250 | \n0.000000 | \n
max | \n26569.000000 | \n385.860000 | \n9.000000 | \n9.000000 | \n29.000000 | \n29.000000 | \n24.000000 | \n21.499000 | \n16.484000 | \n21.425000 | \n... | \n15.412000 | \n22.479000 | \n25.640000 | \n17.663000 | \n22.713000 | \n22.303000 | \n21.626000 | \n24.094000 | \n1312.794000 | \n1.000000 | \n
8 rows × 23 columns
\n