File size: 4,069 Bytes
0ffbac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6de7dc
0ffbac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6de7dc
 
0ffbac7
 
 
b6de7dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ffbac7
 
 
b6de7dc
0ffbac7
 
 
 
b6de7dc
0ffbac7
 
 
 
 
 
 
 
 
 
b6de7dc
0ffbac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d5d0ea64",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>.container { width:95% !important; }</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.core.display import display, HTML, Image\n",
    "display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n",
    "%config IPCompleter.use_jedi=False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "403c4b8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from IPython.display import Markdown, display, HTML, IFrame\n",
    "from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
    "import base64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1c48706a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('./adult.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b512f166",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 32561 entries, 0 to 32560\n",
      "Data columns (total 15 columns):\n",
      " #   Column          Non-Null Count  Dtype \n",
      "---  ------          --------------  ----- \n",
      " 0   age             32561 non-null  int64 \n",
      " 1   workclass       32561 non-null  object\n",
      " 2   fnlwgt          32561 non-null  int64 \n",
      " 3   education       32561 non-null  object\n",
      " 4   education.num   32561 non-null  int64 \n",
      " 5   marital.status  32561 non-null  object\n",
      " 6   occupation      32561 non-null  object\n",
      " 7   relationship    32561 non-null  object\n",
      " 8   race            32561 non-null  object\n",
      " 9   sex             32561 non-null  object\n",
      " 10  capital.gain    32561 non-null  int64 \n",
      " 11  capital.loss    32561 non-null  int64 \n",
      " 12  hours.per.week  32561 non-null  int64 \n",
      " 13  native.country  32561 non-null  object\n",
      " 14  income          32561 non-null  object\n",
      "dtypes: int64(6), object(9)\n",
      "memory usage: 3.7+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "85b71af5",
   "metadata": {},
   "outputs": [],
   "source": [
    "sprite_size = 32 if len(df.index)>50000 else 64\n",
    "\n",
    "jsonstr = df.to_json(orient='records') \n",
    "HTML_TEMPLATE = \"\"\"\n",
    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
    "        <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\">\n",
    "        <facets-dive sprite-image-width=\"{sprite_size}\" sprite-image-height=\"{sprite_size}\" id=\"elem\" height=\"800\"></facets-dive>\n",
    "        <script>\n",
    "          document.querySelector(\"#elem\").data = {jsonstr};\n",
    "        </script>\"\"\"\n",
    "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n",
    "with open(\"index.html\",'w') as fo:\n",
    "    fo.write(html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fce8e9f4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}