File size: 3,388 Bytes
0ffbac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d5d0ea64",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>.container { width:95% !important; }</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.core.display import display, HTML, Image\n",
    "display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n",
    "%config IPCompleter.use_jedi=False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "403c4b8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from IPython.display import Markdown, display, HTML, IFrame\n",
    "from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
    "import base64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1c48706a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('./adult.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "c000f04d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Private             22696\n",
       "Self-emp-not-inc     2541\n",
       "Local-gov            2093\n",
       "?                    1836\n",
       "State-gov            1298\n",
       "Self-emp-inc         1116\n",
       "Federal-gov           960\n",
       "Without-pay            14\n",
       "Never-worked            7\n",
       "Name: workclass, dtype: int64"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['workclass'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "85b71af5",
   "metadata": {},
   "outputs": [],
   "source": [
    "sprite_size = 32 if len(df.index)>50000 else 64\n",
    "\n",
    "jsonstr = df.to_json(orient='records') \n",
    "HTML_TEMPLATE = \"\"\"\n",
    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
    "        <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\">\n",
    "        <facets-dive sprite-image-width=\"{sprite_size}\" sprite-image-height=\"{sprite_size}\" id=\"elem\" height=\"1200\"></facets-dive>\n",
    "        <script>\n",
    "          document.querySelector(\"#elem\").data = {jsonstr};\n",
    "        </script>\"\"\"\n",
    "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n",
    "with open(\"index.html\",'w') as fo:\n",
    "    fo.write(html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fce8e9f4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}