{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "d5d0ea64",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>.container { width:95% !important; }</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from IPython.core.display import display, HTML, Image\n",
    "display(HTML(\"<style>.container { width:95% !important; }</style>\"))\n",
    "%config IPCompleter.use_jedi=False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "403c4b8a",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from IPython.display import Markdown, display, HTML, IFrame\n",
    "from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator\n",
    "import base64"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "1c48706a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('./adult.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b512f166",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 32561 entries, 0 to 32560\n",
      "Data columns (total 15 columns):\n",
      " #   Column          Non-Null Count  Dtype \n",
      "---  ------          --------------  ----- \n",
      " 0   age             32561 non-null  int64 \n",
      " 1   workclass       32561 non-null  object\n",
      " 2   fnlwgt          32561 non-null  int64 \n",
      " 3   education       32561 non-null  object\n",
      " 4   education.num   32561 non-null  int64 \n",
      " 5   marital.status  32561 non-null  object\n",
      " 6   occupation      32561 non-null  object\n",
      " 7   relationship    32561 non-null  object\n",
      " 8   race            32561 non-null  object\n",
      " 9   sex             32561 non-null  object\n",
      " 10  capital.gain    32561 non-null  int64 \n",
      " 11  capital.loss    32561 non-null  int64 \n",
      " 12  hours.per.week  32561 non-null  int64 \n",
      " 13  native.country  32561 non-null  object\n",
      " 14  income          32561 non-null  object\n",
      "dtypes: int64(6), object(9)\n",
      "memory usage: 3.7+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "85b71af5",
   "metadata": {},
   "outputs": [],
   "source": [
    "sprite_size = 32 if len(df.index)>50000 else 64\n",
    "\n",
    "jsonstr = df.to_json(orient='records') \n",
    "HTML_TEMPLATE = \"\"\"\n",
    "        <script src=\"https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js\"></script>\n",
    "        <link rel=\"import\" href=\"https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html\">\n",
    "        <facets-dive sprite-image-width=\"{sprite_size}\" sprite-image-height=\"{sprite_size}\" id=\"elem\" height=\"800\"></facets-dive>\n",
    "        <script>\n",
    "          document.querySelector(\"#elem\").data = {jsonstr};\n",
    "        </script>\"\"\"\n",
    "html = HTML_TEMPLATE.format(jsonstr=jsonstr, sprite_size=sprite_size)\n",
    "with open(\"index.html\",'w') as fo:\n",
    "    fo.write(html)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "fce8e9f4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}