{ "base_uri": "https://localhost:8080/" }, "id": "b_0OPx3WukSi", "outputId": "0d205aa3-33b4-4a34-9055-d670cc5ac049" }, "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "TensorFlow version: 2.15.0\n", "Keras version: 2.15.0\n" ] } ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "WkzyTQGqzbPS", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9bc0c671-8557-4b3c-a120-0237d7f96253" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "markdown", "source": [ "### Loading the Data ###" ], "metadata": { "id": "BKn5EaROLKeX" } }, { "cell_type": "code", "source": [ "import pandas as pd\n", "\n", "# Load the CSV file in memory\n", "train_path = '/content/drive/MyDrive/dataset/Twitter_Financial_News_Sentiment/train.csv'\n", "test_path = '/content/drive/MyDrive/dataset/Twitter_Financial_News_Sentiment/test.csv'\n", "\n", "train_df = pd.read_csv(train_path, usecols=['text', 'label'])\n", "test_df = pd.read_csv(test_path, usecols=['text', 'label'])" ], "metadata": { "id": "QztIz9VOKLuV" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Show example" ], "metadata": { "id": "hn5ONAwkNeFS" } }, { "cell_type": "code", "source": [ "train_df.head()" ], "metadata": { "id": "zwYzU-dANpJ-" }, "execution_count": null, "outputs": [] }, { "source": [ "#import matplotlib library\n", "from matplotlib import pyplot as plt\n", "\n", "#Histogram of \"Label\" column in train datset\n", "train_df['label'].plot(kind='hist', title='Label')\n", "plt.gca().spines[['top', 'right']].set_visible(False)" ], "cell_type": "code", "execution_count": null, "outputs": [], "metadata": { "id": "2M1XLsAeN2GN" } }, { "cell_type": "code", "source": [ "test_df.head()" ], "metadata": { "id": "g5_oGvo1NvON" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Pritn theshape of datasets\n", "print(f'train_df shape: {train_df.shape}')\n", "print(f'test_df shape: {test_df.shape}')" ], "metadata": { "id": "kCFupI1FQlMF" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "### Removing the Special Characters ###" ], "metadata": { "id": "zRcmc15aSNx6" } }, { "cell_type": "code", "source": [ "\n", "!pip install text_hammer\n", "\n", "import text_hammer as th\n", "\n", "def text_proccessing(df, col_name):\n", " \"\"\"\n", " Process text data in a DataFrame column by performing the following operations:\n", "\n", " 1. Convert text to lowercase.\n", " 2. Remove emails from the text.\n", " 3. Remove accented characters from the text.\n", " 4. ETA: 10:10 - loss: 0.9140 - sparse_categorical_accuracy: 0.6261" ] } ] }, { "cell_type": "markdown", "source": [ "#### TESTING PHASE\n", "on this phase we will make predictions out of our model" ], "metadata": { "id": "hgiDVRwSBtCN" } }, { "cell_type": "code", "source": [ "x_test = tokenizer(\n", " text = test_df.text.tolist(),\n", " padding= True,\n", " max_length= 36,\n", " truncation = True,\n", " return_tensors= 'tf')" ], "metadata": { "id": "xaKYd2PRBySe" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "y_test = test_df.label.values\n", "y_test" ], "metadata": { "id": "OpvHTg3atflb" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "predicted = model.predict({'input_ids':x_test['input_ids'],'attention_mask':x_test['attention_mask']})" ], "metadata": { "id": "nWgCdpKvCSWm" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.metrics import confusion_matrix\n", "import seaborn as sns\n", "\n", "# Convert the predictions to binary values (0 or 1)\n", "y_pred_binary = [int(round(x[0])) for x in predicted]\n", "\n", "# Generate the confusion matrix\n", "cm = confusion_matrix(test_df['label'], y_pred_binary)\n", "\n", "# Create a heatmap of the confusion matrix\n", "sns.heatmap(cm, annot=True, fmt=\"d\", cmap=\"Blues\")\n", "plt.xlabel(\"Predicted Label\")\n", "plt.ylabel(\"True Label\")\n", "plt.title(\"Confusion Matrix\")\n", "plt.show()" ], "metadata": { "id": "-BICUoNs_8qI" }, "execution_count": null, "outputs": [] } ] }