{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: transformers in /home/divya/.venv/lib/python3.8/site-packages (4.31.0)\n",
"Requirement already satisfied: filelock in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (3.12.2)\n",
"Requirement already satisfied: huggingface-hub<1.0,>=0.14.1 in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (0.16.4)\n",
"Requirement already satisfied: numpy>=1.17 in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (1.24.3)\n",
"Requirement already satisfied: packaging>=20.0 in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (23.1)\n",
"Requirement already satisfied: pyyaml>=5.1 in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (6.0.1)\n",
"Requirement already satisfied: regex!=2019.12.17 in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (2023.6.3)\n",
"Requirement already satisfied: requests in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (2.31.0)\n",
"Requirement already satisfied: tokenizers!=0.11.3,<0.14,>=0.11.1 in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (0.13.3)\n",
"Requirement already satisfied: safetensors>=0.3.1 in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (0.3.2)\n",
"Requirement already satisfied: tqdm>=4.27 in /home/divya/.venv/lib/python3.8/site-packages (from transformers) (4.65.0)\n",
"Requirement already satisfied: fsspec in /home/divya/.venv/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (2023.6.0)\n",
"Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/divya/.venv/lib/python3.8/site-packages (from huggingface-hub<1.0,>=0.14.1->transformers) (4.7.1)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /home/divya/.venv/lib/python3.8/site-packages (from requests->transformers) (3.1.0)\n",
"Requirement already satisfied: idna<4,>=2.5 in /home/divya/.venv/lib/python3.8/site-packages (from requests->transformers) (3.4)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /home/divya/.venv/lib/python3.8/site-packages (from requests->transformers) (1.26.16)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /home/divya/.venv/lib/python3.8/site-packages (from requests->transformers) (2023.5.7)\n",
"Requirement already satisfied: torch in /home/divya/.venv/lib/python3.8/site-packages (2.0.1)\n",
"Requirement already satisfied: filelock in /home/divya/.venv/lib/python3.8/site-packages (from torch) (3.12.2)\n",
"Requirement already satisfied: typing-extensions in /home/divya/.venv/lib/python3.8/site-packages (from torch) (4.7.1)\n",
"Requirement already satisfied: sympy in /home/divya/.venv/lib/python3.8/site-packages (from torch) (1.12)\n",
"Requirement already satisfied: networkx in /home/divya/.venv/lib/python3.8/site-packages (from torch) (3.1)\n",
"Requirement already satisfied: jinja2 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (3.1.2)\n",
"Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.7.99 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (11.7.99)\n",
"Requirement already satisfied: nvidia-cuda-runtime-cu11==11.7.99 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (11.7.99)\n",
"Requirement already satisfied: nvidia-cuda-cupti-cu11==11.7.101 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (11.7.101)\n",
"Requirement already satisfied: nvidia-cudnn-cu11==8.5.0.96 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (8.5.0.96)\n",
"Requirement already satisfied: nvidia-cublas-cu11==11.10.3.66 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (11.10.3.66)\n",
"Requirement already satisfied: nvidia-cufft-cu11==10.9.0.58 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (10.9.0.58)\n",
"Requirement already satisfied: nvidia-curand-cu11==10.2.10.91 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (10.2.10.91)\n",
"Requirement already satisfied: nvidia-cusolver-cu11==11.4.0.1 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (11.4.0.1)\n",
"Requirement already satisfied: nvidia-cusparse-cu11==11.7.4.91 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (11.7.4.91)\n",
"Requirement already satisfied: nvidia-nccl-cu11==2.14.3 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (2.14.3)\n",
"Requirement already satisfied: nvidia-nvtx-cu11==11.7.91 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (11.7.91)\n",
"Requirement already satisfied: triton==2.0.0 in /home/divya/.venv/lib/python3.8/site-packages (from torch) (2.0.0)\n",
"Requirement already satisfied: setuptools in /home/divya/.venv/lib/python3.8/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (68.0.0)\n",
"Requirement already satisfied: wheel in /home/divya/.venv/lib/python3.8/site-packages (from nvidia-cublas-cu11==11.10.3.66->torch) (0.40.0)\n",
"Requirement already satisfied: cmake in /home/divya/.venv/lib/python3.8/site-packages (from triton==2.0.0->torch) (3.26.4)\n",
"Requirement already satisfied: lit in /home/divya/.venv/lib/python3.8/site-packages (from triton==2.0.0->torch) (16.0.6)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /home/divya/.venv/lib/python3.8/site-packages (from jinja2->torch) (2.1.3)\n",
"Requirement already satisfied: mpmath>=0.19 in /home/divya/.venv/lib/python3.8/site-packages (from sympy->torch) (1.3.0)\n"
]
}
],
"source": [
"# Install Transformers\n",
"!pip install transformers\n",
"# To get model summary\n",
"!pip install torch"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: seaborn in /home/divya/.venv/lib/python3.8/site-packages (0.12.2)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.17 in /home/divya/.venv/lib/python3.8/site-packages (from seaborn) (1.24.3)\n",
"Requirement already satisfied: pandas>=0.25 in /home/divya/.venv/lib/python3.8/site-packages (from seaborn) (2.0.2)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.1 in /home/divya/.venv/lib/python3.8/site-packages (from seaborn) (3.7.1)\n",
"Requirement already satisfied: contourpy>=1.0.1 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.1.0)\n",
"Requirement already satisfied: cycler>=0.10 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (0.11.0)\n",
"Requirement already satisfied: fonttools>=4.22.0 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (4.40.0)\n",
"Requirement already satisfied: kiwisolver>=1.0.1 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (1.4.4)\n",
"Requirement already satisfied: packaging>=20.0 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (23.1)\n",
"Requirement already satisfied: pillow>=6.2.0 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (10.0.0)\n",
"Requirement already satisfied: pyparsing>=2.3.1 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (3.1.0)\n",
"Requirement already satisfied: python-dateutil>=2.7 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (2.8.2)\n",
"Requirement already satisfied: importlib-resources>=3.2.0 in /home/divya/.venv/lib/python3.8/site-packages (from matplotlib!=3.6.1,>=3.1->seaborn) (5.12.0)\n",
"Requirement already satisfied: pytz>=2020.1 in /home/divya/.venv/lib/python3.8/site-packages (from pandas>=0.25->seaborn) (2023.3)\n",
"Requirement already satisfied: tzdata>=2022.1 in /home/divya/.venv/lib/python3.8/site-packages (from pandas>=0.25->seaborn) (2023.3)\n",
"Requirement already satisfied: zipp>=3.1.0 in /home/divya/.venv/lib/python3.8/site-packages (from importlib-resources>=3.2.0->matplotlib!=3.6.1,>=3.1->seaborn) (3.15.0)\n",
"Requirement already satisfied: six>=1.5 in /home/divya/.venv/lib/python3.8/site-packages (from python-dateutil>=2.7->matplotlib!=3.6.1,>=3.1->seaborn) (1.16.0)\n"
]
}
],
"source": [
"!pip install seaborn"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2023-08-14 03:17:51.971632: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
"2023-08-14 03:17:52.770602: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
]
}
],
"source": [
"#import required package\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import re\n",
"import torch\n",
"import random\n",
"import torch.nn as nn\n",
"import transformers\n",
"from transformers import BertModel, BertTokenizer, AdamW, get_linear_schedule_with_warmup\n",
"import matplotlib.pyplot as plt\n",
"from torch.utils.data import Dataset, DataLoader\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import confusion_matrix, classification_report\n",
"from collections import defaultdict\n",
"import pickle\n",
"from tqdm import tqdm\n",
"import gradio as gr"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# specify GPU\n",
"device = torch.device(\"cuda\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ[\"CUDA_DEVICE_ORDER\"]=\"PCI_BUS_ID\" \n",
"os.environ['CUDA_VISIBLE_DEVICES']='1'"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"#read the reviews of fine food from .csv file\n",
"reviews_df=pd.read_csv(\"/home/divya/vivek5/amazon question answer/Reviews.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" ProductId | \n",
" UserId | \n",
" ProfileName | \n",
" HelpfulnessNumerator | \n",
" HelpfulnessDenominator | \n",
" Score | \n",
" Time | \n",
" Summary | \n",
" Text | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" B001E4KFG0 | \n",
" A3SGXH7AUHU8GW | \n",
" delmartian | \n",
" 1 | \n",
" 1 | \n",
" 5 | \n",
" 1303862400 | \n",
" Good Quality Dog Food | \n",
" I have bought several of the Vitality canned d... | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" B00813GRG4 | \n",
" A1D87F6ZCVE5NK | \n",
" dll pa | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1346976000 | \n",
" Not as Advertised | \n",
" Product arrived labeled as Jumbo Salted Peanut... | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" B000LQOCH0 | \n",
" ABXLMWJIXXAIN | \n",
" Natalia Corres \"Natalia Corres\" | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 1219017600 | \n",
" \"Delight\" says it all | \n",
" This is a confection that has been around a fe... | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" B000UA0QIQ | \n",
" A395BORC6FGVXV | \n",
" Karl | \n",
" 3 | \n",
" 3 | \n",
" 2 | \n",
" 1307923200 | \n",
" Cough Medicine | \n",
" If you are looking for the secret ingredient i... | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" B006K2ZZ7K | \n",
" A1UQRSCLF8GW1T | \n",
" Michael D. Bigham \"M. Wassir\" | \n",
" 0 | \n",
" 0 | \n",
" 5 | \n",
" 1350777600 | \n",
" Great taffy | \n",
" Great taffy at a great price. There was a wid... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id ProductId UserId ProfileName \\\n",
"0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian \n",
"1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa \n",
"2 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres \"Natalia Corres\" \n",
"3 4 B000UA0QIQ A395BORC6FGVXV Karl \n",
"4 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham \"M. Wassir\" \n",
"\n",
" HelpfulnessNumerator HelpfulnessDenominator Score Time \\\n",
"0 1 1 5 1303862400 \n",
"1 0 0 1 1346976000 \n",
"2 1 1 4 1219017600 \n",
"3 3 3 2 1307923200 \n",
"4 0 0 5 1350777600 \n",
"\n",
" Summary Text \n",
"0 Good Quality Dog Food I have bought several of the Vitality canned d... \n",
"1 Not as Advertised Product arrived labeled as Jumbo Salted Peanut... \n",
"2 \"Delight\" says it all This is a confection that has been around a fe... \n",
"3 Cough Medicine If you are looking for the secret ingredient i... \n",
"4 Great taffy Great taffy at a great price. There was a wid... "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reviews_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"def itemfreq(data):\n",
" items, inv = np.unique(data, return_inverse=True)\n",
" freq = np.bincount(inv)\n",
" return items,freq"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([ 18296., 0., 10575., 0., 0., 15624., 0.,\n",
" 29118., 0., 126387.]),\n",
" array([1. , 1.4, 1.8, 2.2, 2.6, 3. , 3.4, 3.8, 4.2, 4.6, 5. ]),\n",
" )"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.hist(reviews_df.Score)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Id | \n",
" ProductId | \n",
" UserId | \n",
" ProfileName | \n",
" HelpfulnessNumerator | \n",
" HelpfulnessDenominator | \n",
" Score | \n",
" Time | \n",
" Summary | \n",
" Text | \n",
" sentiment | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" B001E4KFG0 | \n",
" A3SGXH7AUHU8GW | \n",
" delmartian | \n",
" 1 | \n",
" 1 | \n",
" 5 | \n",
" 1303862400 | \n",
" Good Quality Dog Food | \n",
" I have bought several of the Vitality canned d... | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" B00813GRG4 | \n",
" A1D87F6ZCVE5NK | \n",
" dll pa | \n",
" 0 | \n",
" 0 | \n",
" 1 | \n",
" 1346976000 | \n",
" Not as Advertised | \n",
" Product arrived labeled as Jumbo Salted Peanut... | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Id ProductId UserId ProfileName HelpfulnessNumerator \\\n",
"0 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian 1 \n",
"1 2 B00813GRG4 A1D87F6ZCVE5NK dll pa 0 \n",
"\n",
" HelpfulnessDenominator Score Time Summary \\\n",
"0 1 5 1303862400 Good Quality Dog Food \n",
"1 0 1 1346976000 Not as Advertised \n",
"\n",
" Text sentiment \n",
"0 I have bought several of the Vitality canned d... 2 \n",
"1 Product arrived labeled as Jumbo Salted Peanut... 0 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def get_sentiment(score):\n",
" if score<=2:\n",
" return 0 # negative sentiment\n",
" elif score==3:\n",
" return 1 # neutral sentiment\n",
" else:\n",
" return 2 # positive sentiment\n",
" \n",
"\n",
"reviews_df['sentiment'] = reviews_df.Score.apply(get_sentiment)\n",
"reviews_df.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"