diff --git "a/XLM Roberta Sentiment Analysis 512 8.ipynb" "b/XLM Roberta Sentiment Analysis 512 8.ipynb" new file mode 100644--- /dev/null +++ "b/XLM Roberta Sentiment Analysis 512 8.ipynb" @@ -0,0 +1,6317 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true, + "id": "QeLkrw3YcwXk" + }, + "outputs": [], + "source": [ + "%pip install transformers\n", + "%pip install torch\n", + "%pip install pandas\n", + "%pip install scikit-learn\n", + "%pip install datasets\n", + "%pip install evaluate\n", + "%pip install tqdm\n", + "%pip install openpyxl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "PelpmuZhT5E1", + "outputId": "581a84b2-c2fa-41bd-90b9-267715404a15" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mounted at /content/drive\n" + ] + } + ], + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cEpWVgUyd0Xw" + }, + "outputs": [], + "source": [ + "from transformers import AutoTokenizer, AutoModelForMaskedLM, DataCollatorWithPadding, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollator\n", + "from datasets import Dataset, DatasetDict\n", + "import torch\n", + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "import re" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 272, + "referenced_widgets": [ + "b463fdb9f6e84e16b6ab8669ed2184c8", + "590d6b50a1864b77b11bbc0d838efdd8", + "1fcfe520a82a4e74aa602d5f5577c800", + "cbc816ba8e8443caa9b407c8a8a9bbbe", + "95daf36d87d74583b2b0aec60df020cd", + "76c856cc8b4746e28e3b89e89b2545dc", + "90bfb51397904c06a4d701e511118bd5", + "808b60b122d240f5920d3f8a261f7687", + "46f314085db64d3c8813564006706937", + "9c8ab70717e042af995970733bff333b", + "7fbba388113949cfaa15b49a4e3373ad", + "70cb331b2d1f4f28826e2464a9d360fc", + "aeb6a94508984c84a60f2c0f5f2cfe6d", + "feb3279ea02548aaac6888af3156603a", + "2da2a80f5e8e4387a967439458b6455b", + "06455ff833144975b3e33dc128a2e1e7", + "f214b85aaa5a45aebb46dbb4c5e38b68", + "a8f15671f7b84a2fa434a410b567954b", + "473fc6be317645b5b9aea90d9dc9a0ec", + "c45072293e2d4289b334e409a53daa13", + "85b001ad3aef40dbbdeedc2275d27eaf", + "af95e4e60fd444f0b5225bfc580b0bf6", + "f23cd374731144f1ac0d14a65ad33fc7", + "ea9ca8c84d25409d9fd5f74da39e03d2", + "e3a3c23842114d8b86d944a9543be38a", + "7e91bae77abc4c1fa3a69e3d6c6bf21d", + "993e8d054ab84c978c49a5d81139ba30", + "2df2b56d7b0d42d38aeeb6dcc0e95072", + "028c7dfca27c49f99f1e97f5a45dffa3", + "6d4f6438eb3341b1bf690bbdfd2cadc9", + "f7ccc805e6c3424db54d1ec28c620dbc", + "91d68c865e01466d91980ffaf5ef75fa", + "67ed03fe39f94e5ba87f0adef1da4ca6", + "7ef2ad9ed0cc4fb1a032af05f6b6db7f", + "26ce1bea48e84116a3003e443dea982c", + "2892dbd6cfd64b46b25d09d00db02f84", + "1a2dd92799f940b894eeaccefc0af016", + "25d9970aaa2f414c88e4a02acb023fd2", + "c87bd4c1516f4acd9b8d304f5ac11ef5", + "fe24614f1d304ab2b65b6d5e2d2b01d1", + "c2ddec0fb5f04b9ea9dd0e93c1de6d24", + "39afc69cf65f467186b933b21f8e2ff4", + "b950207798d54923874c9afcb69b0493", + "4d5f66e9f2df4ed89628f7cdf919750f" + ] + }, + "id": "2GbEdK5dd5Ty", + "outputId": "77d3eb43-c40e-4f4a-b9df-b1e1ca9d55b8" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_auth.py:94: UserWarning: \n", + "The secret `HF_TOKEN` does not exist in your Colab secrets.\n", + "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n", + "You will be able to reuse this secret in all of your notebooks.\n", + "Please note that authentication is recommended but still optional to access public models or datasets.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b463fdb9f6e84e16b6ab8669ed2184c8", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "tokenizer_config.json: 0%| | 0.00/25.0 [00:00