diff --git "a/CustomerReviewSentiment/XGBooost.ipynb" "b/CustomerReviewSentiment/XGBooost.ipynb" new file mode 100644--- /dev/null +++ "b/CustomerReviewSentiment/XGBooost.ipynb" @@ -0,0 +1,2820 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Environment setup" + ], + "metadata": { + "id": "5CJSWZ0seqdi" + } + }, + { + "cell_type": "code", + "source": [ + "!npm install vietnamese-stopwords" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "chVA0nDcanqO", + "outputId": "603f1824-4dd1-4eb9-c21c-1ad81a495300" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[K\u001b[?25h\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35msaveError\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n", + "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m \u001b[0m\u001b[35menoent\u001b[0m ENOENT: no such file or directory, open '/content/package.json'\n", + "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No description\n", + "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No repository field.\n", + "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No README data\n", + "\u001b[0m\u001b[37;40mnpm\u001b[0m \u001b[0m\u001b[30;43mWARN\u001b[0m\u001b[35m\u001b[0m content No license field.\n", + "\u001b[0m\n", + "\u001b[K\u001b[?25h+ vietnamese-stopwords@0.0.2\n", + "updated 1 package and audited 1 package in 0.851s\n", + "found \u001b[92m0\u001b[0m vulnerabilities\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "XGEfwBRWYkXq", + "outputId": "72dba600-dc2e-4899-d541-2d8a7df97a68" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "cd '/content/drive/MyDrive/CustomerReviewSentiment'" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_sH-b_JWYwVe", + "outputId": "8f14dd56-8326-4321-91c6-978cc59b102b" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "/content/drive/MyDrive/CustomerReviewSentiment\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install sentence_transformers" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yVsu_gxoqhQX", + "outputId": "4362a21f-a065-4761-d79f-826e82fda4d8" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: sentence_transformers in /usr/local/lib/python3.10/dist-packages (2.2.2)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.35.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (4.66.1)\n", + "Requirement already satisfied: torch>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (2.1.0+cu118)\n", + "Requirement already satisfied: torchvision in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.16.0+cu118)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.23.5)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.2.2)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (1.11.4)\n", + "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (3.8.1)\n", + "Requirement already satisfied: sentencepiece in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.1.99)\n", + "Requirement already satisfied: huggingface-hub>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from sentence_transformers) (0.19.4)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (3.13.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2023.6.0)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (2.31.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (6.0.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (4.5.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.4.0->sentence_transformers) (23.2)\n", + "Requirement already satisfied: sympy in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (1.12)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (3.1.2)\n", + "Requirement already satisfied: triton==2.1.0 in /usr/local/lib/python3.10/dist-packages (from torch>=1.6.0->sentence_transformers) (2.1.0)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (2023.6.3)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.15.0)\n", + "Requirement already satisfied: safetensors>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from transformers<5.0.0,>=4.6.0->sentence_transformers) (0.4.1)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (8.1.7)\n", + "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from nltk->sentence_transformers) (1.3.2)\n", + "Requirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn->sentence_transformers) (3.2.0)\n", + "Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.10/dist-packages (from torchvision->sentence_transformers) (9.4.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.6.0->sentence_transformers) (2.1.3)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->huggingface-hub>=0.4.0->sentence_transformers) (2023.11.17)\n", + "Requirement already satisfied: mpmath>=0.19 in /usr/local/lib/python3.10/dist-packages (from sympy->torch>=1.6.0->sentence_transformers) (1.3.0)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# Data preparation" + ], + "metadata": { + "id": "fmmgYwpCfP6L" + } + }, + { + "cell_type": "code", + "source": [ + "import pandas as pd\n", + "\n", + "data = pd.read_csv('clean_data.csv')" + ], + "metadata": { + "id": "w9XJerFjfWAd" + }, + "execution_count": 12, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "data" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 424 + }, + "id": "WlzOKiMBY1sI", + "outputId": "957bc541-2ff4-4463-f1d3-857d393cc848" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Unnamed: 0 content score thumbsUpCount \\\n", + "0 0 rất thuận tiện , có lợi ích . 5 2 \n", + "1 1 tốt 4 213 \n", + "2 2 ok 5 1 \n", + "3 3 xài rất tốt , nên tải nha 5 8 \n", + "4 4 gútttt 5 1 \n", + "... ... ... ... ... \n", + "310741 99995 ok 5 0 \n", + "310742 99996 g o o d 5 0 \n", + "310743 99997 có 5 0 \n", + "310744 99998 nhiều lúc tôi k lấy đc hiệu ứng 1 0 \n", + "310745 99999 tui chưa chơi ko biết có hay ko . 3 0 \n", + "\n", + " Application \n", + "0 Messenger \n", + "1 Messenger \n", + "2 Messenger \n", + "3 Messenger \n", + "4 Messenger \n", + "... ... \n", + "310741 Instagram \n", + "310742 Instagram \n", + "310743 Instagram \n", + "310744 Instagram \n", + "310745 Instagram \n", + "\n", + "[310746 rows x 5 columns]" + ], + "text/html": [ + "\n", + "
\n", + " | Unnamed: 0 | \n", + "content | \n", + "score | \n", + "thumbsUpCount | \n", + "Application | \n", + "
---|---|---|---|---|---|
0 | \n", + "0 | \n", + "rất thuận tiện , có lợi ích . | \n", + "5 | \n", + "2 | \n", + "Messenger | \n", + "
1 | \n", + "1 | \n", + "tốt | \n", + "4 | \n", + "213 | \n", + "Messenger | \n", + "
2 | \n", + "2 | \n", + "ok | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "
3 | \n", + "3 | \n", + "xài rất tốt , nên tải nha | \n", + "5 | \n", + "8 | \n", + "Messenger | \n", + "
4 | \n", + "4 | \n", + "gútttt | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
310741 | \n", + "99995 | \n", + "ok | \n", + "5 | \n", + "0 | \n", + "|
310742 | \n", + "99996 | \n", + "g o o d | \n", + "5 | \n", + "0 | \n", + "|
310743 | \n", + "99997 | \n", + "có | \n", + "5 | \n", + "0 | \n", + "|
310744 | \n", + "99998 | \n", + "nhiều lúc tôi k lấy đc hiệu ứng | \n", + "1 | \n", + "0 | \n", + "|
310745 | \n", + "99999 | \n", + "tui chưa chơi ko biết có hay ko . | \n", + "3 | \n", + "0 | \n", + "
310746 rows × 5 columns
\n", + "\n", + " | content | \n", + "score | \n", + "thumbsUpCount | \n", + "Application | \n", + "
---|---|---|---|---|
0 | \n", + "rất thuận tiện , có lợi ích . | \n", + "5 | \n", + "2 | \n", + "Messenger | \n", + "
1 | \n", + "tốt | \n", + "4 | \n", + "213 | \n", + "Messenger | \n", + "
2 | \n", + "ok | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "
3 | \n", + "xài rất tốt , nên tải nha | \n", + "5 | \n", + "8 | \n", + "Messenger | \n", + "
4 | \n", + "gútttt | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
310741 | \n", + "ok | \n", + "5 | \n", + "0 | \n", + "|
310742 | \n", + "g o o d | \n", + "5 | \n", + "0 | \n", + "|
310743 | \n", + "có | \n", + "5 | \n", + "0 | \n", + "|
310744 | \n", + "nhiều lúc tôi k lấy đc hiệu ứng | \n", + "1 | \n", + "0 | \n", + "|
310745 | \n", + "tui chưa chơi ko biết có hay ko . | \n", + "3 | \n", + "0 | \n", + "
310746 rows × 4 columns
\n", + "\n", + " | content | \n", + "score | \n", + "thumbsUpCount | \n", + "Application | \n", + "
---|---|---|---|---|
0 | \n", + "thuận tiện lợi ích | \n", + "5 | \n", + "2 | \n", + "Messenger | \n", + "
1 | \n", + "\n", + " | 4 | \n", + "213 | \n", + "Messenger | \n", + "
2 | \n", + "ok | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "
3 | \n", + "xài tải nha | \n", + "5 | \n", + "8 | \n", + "Messenger | \n", + "
4 | \n", + "gútttt | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
995 | \n", + "cập nhật clllll xóa mẹ mess đi | \n", + "1 | \n", + "1 | \n", + "Messenger | \n", + "
996 | \n", + "ứng dụng kém | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "
997 | \n", + "tiếng vọng | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "
998 | \n", + "ok | \n", + "5 | \n", + "0 | \n", + "Messenger | \n", + "
999 | \n", + "ghi | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "
1000 rows × 4 columns
\n", + "\n", + " | content | \n", + "score | \n", + "thumbsUpCount | \n", + "Application | \n", + "feature_0 | \n", + "feature_1 | \n", + "feature_2 | \n", + "feature_3 | \n", + "feature_4 | \n", + "feature_5 | \n", + "... | \n", + "feature_502 | \n", + "feature_503 | \n", + "feature_504 | \n", + "feature_505 | \n", + "feature_506 | \n", + "feature_507 | \n", + "feature_508 | \n", + "feature_509 | \n", + "feature_510 | \n", + "feature_511 | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "thuận tiện lợi ích | \n", + "5 | \n", + "2 | \n", + "Messenger | \n", + "0.007724 | \n", + "-0.029080 | \n", + "0.013690 | \n", + "-0.009045 | \n", + "0.023238 | \n", + "-0.000431 | \n", + "... | \n", + "-0.016954 | \n", + "-0.036230 | \n", + "-0.035321 | \n", + "0.018905 | \n", + "-0.004153 | \n", + "-0.050888 | \n", + "0.026344 | \n", + "-0.036549 | \n", + "0.005494 | \n", + "-0.022665 | \n", + "
1 | \n", + "\n", + " | 4 | \n", + "213 | \n", + "Messenger | \n", + "0.017581 | \n", + "-0.002738 | \n", + "-0.038254 | \n", + "-0.041900 | \n", + "-0.053605 | \n", + "0.027044 | \n", + "... | \n", + "0.016105 | \n", + "-0.003971 | \n", + "0.019439 | \n", + "0.017352 | \n", + "-0.038702 | \n", + "-0.057055 | \n", + "-0.019953 | \n", + "-0.015060 | \n", + "0.021243 | \n", + "-0.026957 | \n", + "
2 | \n", + "ok | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "0.028191 | \n", + "-0.010388 | \n", + "-0.052607 | \n", + "-0.094258 | \n", + "0.026439 | \n", + "0.025329 | \n", + "... | \n", + "0.044415 | \n", + "0.029121 | \n", + "0.023854 | \n", + "0.024050 | \n", + "0.003137 | \n", + "-0.067067 | \n", + "-0.010747 | \n", + "0.015472 | \n", + "-0.007338 | \n", + "-0.064763 | \n", + "
3 | \n", + "xài tải nha | \n", + "5 | \n", + "8 | \n", + "Messenger | \n", + "-0.012584 | \n", + "-0.066111 | \n", + "-0.015364 | \n", + "-0.006006 | \n", + "-0.049788 | \n", + "0.031777 | \n", + "... | \n", + "0.029615 | \n", + "-0.012054 | \n", + "0.038802 | \n", + "0.006297 | \n", + "0.011572 | \n", + "-0.025683 | \n", + "0.032493 | \n", + "-0.004576 | \n", + "0.034961 | \n", + "-0.028953 | \n", + "
4 | \n", + "gútttt | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "0.011783 | \n", + "-0.014584 | \n", + "-0.033242 | \n", + "-0.039972 | \n", + "-0.041038 | \n", + "0.000820 | \n", + "... | \n", + "0.018747 | \n", + "-0.020979 | \n", + "0.011736 | \n", + "0.006028 | \n", + "-0.020612 | \n", + "-0.021013 | \n", + "0.004732 | \n", + "-0.011790 | \n", + "-0.021903 | \n", + "0.000539 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
995 | \n", + "cập nhật clllll xóa mẹ mess đi | \n", + "1 | \n", + "1 | \n", + "Messenger | \n", + "-0.003216 | \n", + "-0.013464 | \n", + "0.004712 | \n", + "0.002270 | \n", + "-0.053654 | \n", + "-0.008947 | \n", + "... | \n", + "0.005085 | \n", + "-0.007261 | \n", + "0.017898 | \n", + "0.018807 | \n", + "-0.013562 | \n", + "-0.015285 | \n", + "-0.032608 | \n", + "0.012056 | \n", + "0.017730 | \n", + "-0.023654 | \n", + "
996 | \n", + "ứng dụng kém | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "0.003408 | \n", + "0.011238 | \n", + "-0.003270 | \n", + "0.009822 | \n", + "0.010839 | \n", + "0.013731 | \n", + "... | \n", + "0.005113 | \n", + "-0.001555 | \n", + "0.023239 | \n", + "0.026508 | \n", + "-0.008224 | \n", + "-0.027951 | \n", + "-0.023829 | \n", + "-0.005397 | \n", + "-0.006893 | \n", + "0.011582 | \n", + "
997 | \n", + "tiếng vọng | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "0.009905 | \n", + "0.016094 | \n", + "-0.045669 | \n", + "-0.000068 | \n", + "-0.040511 | \n", + "0.034474 | \n", + "... | \n", + "0.018995 | \n", + "-0.012872 | \n", + "0.006261 | \n", + "-0.016766 | \n", + "-0.041759 | \n", + "-0.059727 | \n", + "-0.005747 | \n", + "-0.008027 | \n", + "0.004528 | \n", + "-0.001835 | \n", + "
998 | \n", + "ok | \n", + "5 | \n", + "0 | \n", + "Messenger | \n", + "0.028191 | \n", + "-0.010388 | \n", + "-0.052607 | \n", + "-0.094258 | \n", + "0.026439 | \n", + "0.025329 | \n", + "... | \n", + "0.044415 | \n", + "0.029121 | \n", + "0.023854 | \n", + "0.024050 | \n", + "0.003137 | \n", + "-0.067067 | \n", + "-0.010747 | \n", + "0.015472 | \n", + "-0.007338 | \n", + "-0.064763 | \n", + "
999 | \n", + "ghi | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "0.028111 | \n", + "0.017755 | \n", + "-0.039428 | \n", + "-0.035738 | \n", + "-0.039431 | \n", + "0.019438 | \n", + "... | \n", + "0.013316 | \n", + "-0.008019 | \n", + "0.014297 | \n", + "0.013871 | \n", + "-0.029063 | \n", + "-0.038971 | \n", + "-0.016512 | \n", + "-0.027867 | \n", + "0.003646 | \n", + "-0.013748 | \n", + "
1000 rows × 1028 columns
\n", + "\n", + " | content | \n", + "score | \n", + "thumbsUpCount | \n", + "Application | \n", + "feature_0 | \n", + "feature_1 | \n", + "feature_2 | \n", + "feature_3 | \n", + "feature_4 | \n", + "feature_5 | \n", + "... | \n", + "feature_502 | \n", + "feature_503 | \n", + "feature_504 | \n", + "feature_505 | \n", + "feature_506 | \n", + "feature_507 | \n", + "feature_508 | \n", + "feature_509 | \n", + "feature_510 | \n", + "feature_511 | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "thuận tiện lợi ích | \n", + "5 | \n", + "2 | \n", + "Messenger | \n", + "0.007724 | \n", + "-0.029080 | \n", + "0.013690 | \n", + "-0.009045 | \n", + "0.023238 | \n", + "-0.000431 | \n", + "... | \n", + "-0.016954 | \n", + "-0.036230 | \n", + "-0.035321 | \n", + "0.018905 | \n", + "-0.004153 | \n", + "-0.050888 | \n", + "0.026344 | \n", + "-0.036549 | \n", + "0.005494 | \n", + "-0.022665 | \n", + "
1 | \n", + "\n", + " | 4 | \n", + "213 | \n", + "Messenger | \n", + "0.017581 | \n", + "-0.002738 | \n", + "-0.038254 | \n", + "-0.041900 | \n", + "-0.053605 | \n", + "0.027044 | \n", + "... | \n", + "0.016105 | \n", + "-0.003971 | \n", + "0.019439 | \n", + "0.017352 | \n", + "-0.038702 | \n", + "-0.057055 | \n", + "-0.019953 | \n", + "-0.015060 | \n", + "0.021243 | \n", + "-0.026957 | \n", + "
2 | \n", + "ok | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "0.028191 | \n", + "-0.010388 | \n", + "-0.052607 | \n", + "-0.094258 | \n", + "0.026439 | \n", + "0.025329 | \n", + "... | \n", + "0.044415 | \n", + "0.029121 | \n", + "0.023854 | \n", + "0.024050 | \n", + "0.003137 | \n", + "-0.067067 | \n", + "-0.010747 | \n", + "0.015472 | \n", + "-0.007338 | \n", + "-0.064763 | \n", + "
3 | \n", + "xài tải nha | \n", + "5 | \n", + "8 | \n", + "Messenger | \n", + "-0.012584 | \n", + "-0.066111 | \n", + "-0.015364 | \n", + "-0.006006 | \n", + "-0.049788 | \n", + "0.031777 | \n", + "... | \n", + "0.029615 | \n", + "-0.012054 | \n", + "0.038802 | \n", + "0.006297 | \n", + "0.011572 | \n", + "-0.025683 | \n", + "0.032493 | \n", + "-0.004576 | \n", + "0.034961 | \n", + "-0.028953 | \n", + "
4 | \n", + "gútttt | \n", + "5 | \n", + "1 | \n", + "Messenger | \n", + "0.011783 | \n", + "-0.014584 | \n", + "-0.033242 | \n", + "-0.039972 | \n", + "-0.041038 | \n", + "0.000820 | \n", + "... | \n", + "0.018747 | \n", + "-0.020979 | \n", + "0.011736 | \n", + "0.006028 | \n", + "-0.020612 | \n", + "-0.021013 | \n", + "0.004732 | \n", + "-0.011790 | \n", + "-0.021903 | \n", + "0.000539 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
995 | \n", + "cập nhật clllll xóa mẹ mess đi | \n", + "1 | \n", + "1 | \n", + "Messenger | \n", + "-0.003216 | \n", + "-0.013464 | \n", + "0.004712 | \n", + "0.002270 | \n", + "-0.053654 | \n", + "-0.008947 | \n", + "... | \n", + "0.005085 | \n", + "-0.007261 | \n", + "0.017898 | \n", + "0.018807 | \n", + "-0.013562 | \n", + "-0.015285 | \n", + "-0.032608 | \n", + "0.012056 | \n", + "0.017730 | \n", + "-0.023654 | \n", + "
996 | \n", + "ứng dụng kém | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "0.003408 | \n", + "0.011238 | \n", + "-0.003270 | \n", + "0.009822 | \n", + "0.010839 | \n", + "0.013731 | \n", + "... | \n", + "0.005113 | \n", + "-0.001555 | \n", + "0.023239 | \n", + "0.026508 | \n", + "-0.008224 | \n", + "-0.027951 | \n", + "-0.023829 | \n", + "-0.005397 | \n", + "-0.006893 | \n", + "0.011582 | \n", + "
997 | \n", + "tiếng vọng | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "0.009905 | \n", + "0.016094 | \n", + "-0.045669 | \n", + "-0.000068 | \n", + "-0.040511 | \n", + "0.034474 | \n", + "... | \n", + "0.018995 | \n", + "-0.012872 | \n", + "0.006261 | \n", + "-0.016766 | \n", + "-0.041759 | \n", + "-0.059727 | \n", + "-0.005747 | \n", + "-0.008027 | \n", + "0.004528 | \n", + "-0.001835 | \n", + "
998 | \n", + "ok | \n", + "5 | \n", + "0 | \n", + "Messenger | \n", + "0.028191 | \n", + "-0.010388 | \n", + "-0.052607 | \n", + "-0.094258 | \n", + "0.026439 | \n", + "0.025329 | \n", + "... | \n", + "0.044415 | \n", + "0.029121 | \n", + "0.023854 | \n", + "0.024050 | \n", + "0.003137 | \n", + "-0.067067 | \n", + "-0.010747 | \n", + "0.015472 | \n", + "-0.007338 | \n", + "-0.064763 | \n", + "
999 | \n", + "ghi | \n", + "1 | \n", + "0 | \n", + "Messenger | \n", + "0.028111 | \n", + "0.017755 | \n", + "-0.039428 | \n", + "-0.035738 | \n", + "-0.039431 | \n", + "0.019438 | \n", + "... | \n", + "0.013316 | \n", + "-0.008019 | \n", + "0.014297 | \n", + "0.013871 | \n", + "-0.029063 | \n", + "-0.038971 | \n", + "-0.016512 | \n", + "-0.027867 | \n", + "0.003646 | \n", + "-0.013748 | \n", + "
1000 rows × 516 columns
\n", + "XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.01, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=10, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=1000, n_jobs=None,\n", + " num_parallel_tree=None, objective='multi:softprob', ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=0.01, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=10, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=1000, n_jobs=None,\n", + " num_parallel_tree=None, objective='multi:softprob', ...)