{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "data_path = 'dataset/train/dataframemulticlasse.csv'\n", "test_path = 'dataset/test/dataframemulticlasse_test.csv'\n", "\n", "df_train = pd.read_csv(data_path)\n", "df_test = pd.read_csv(test_path)\n", "\n", "train_data = df_train['text']\n", "train_labels = df_train.label\n", "\n", "test_data = df_test.text\n", "test_labels = df_test.label\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 La possibilità di assistere a parate culturali...\n", "1 Partecipare a workshop di mindfulness o medita...\n", "2 Esplorare parchi divertimenti tematici o attra...\n", "3 La visita a centri di ricerca marina o laborat...\n", "4 Partecipare a corsi di cucina etnica permette ...\n", "Name: text, dtype: object\n", "0 Gustare il gelato artigianale a Firenze, in un...\n", "1 Esplorare i mercati galleggianti della Thailan...\n", "2 Assaggiare il sushi fresco a Tokyo, direttamen...\n", "3 Degustare il formaggio francese in una fromage...\n", "4 Assaporare la paella valenciana in Spagna, con...\n", "Name: text, dtype: object\n", "0 travel and food\n", "1 travel and food\n", "2 travel and food\n", "3 travel and food\n", "4 travel and food\n", " ... \n", "475 cultural activities\n", "476 cultural activities\n", "477 cultural activities\n", "478 cultural activities\n", "479 cultural activities\n", "Name: label, Length: 480, dtype: object\n" ] } ], "source": [ "print(train_data.head())\n", "print(test_data.head())\n", "print(test_labels)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2400\n" ] } ], "source": [ "y_train = []\n", "\n", "for label in train_labels:\n", " if label=='travel and food':\n", " y_train.append([1, 0, 0])\n", " elif label=='shopping':\n", " y_train.append([0, 1, 0])\n", " else:\n", " y_train.append([0, 0, 1])\n", "\n", "print(len(y_train))\n", "\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "480\n" ] } ], "source": [ "y_test = []\n", "\n", "for label in test_labels:\n", " if label=='travel and food':\n", " y_test.append([1, 0, 0])\n", " elif label=='shopping':\n", " y_test.append([0, 1, 0])\n", " else:\n", " y_test.append([0, 0, 1])\n", "\n", "print(len(y_test))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 Gustare il gelato artigianale a Firenze, in un...\n", "1 Esplorare i mercati galleggianti della Thailan...\n", "2 Assaggiare il sushi fresco a Tokyo, direttamen...\n", "3 Degustare il formaggio francese in una fromage...\n", "4 Assaporare la paella valenciana in Spagna, con...\n", "Name: text, dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data.head()\n", "test_data.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#Pre-processamento dei dati il metodo riceve in input una stringa e ne restituisce il suo pre-processamento\n", "from nltk.tokenize import word_tokenize\n", "from nltk.corpus import stopwords\n", "from nltk.stem import WordNetLemmatizer\n", "import string\n", "\n", "def preprocess_text(text):\n", " #Lower text\n", " tokens = word_tokenize(text.lower())\n", " #Rimozione stop words\n", " filtered_tokens = [token for token in tokens if token not in stopwords.words('italian')]\n", " #Lemmatizzazione\n", " lemmatizer = WordNetLemmatizer()\n", " lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]\n", " #Join lemmatizzazione del testo\n", " processed_text = ' '.join(lemmatized_tokens)\n", " #Eliminazione punteggiatura\n", " return processed_text.translate(str.maketrans('','', string.punctuation))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "x_train = train_data.apply(preprocess_text)\n", "x_test = test_data.apply(preprocess_text)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "possibilità assistere parate culturali sfilate costumi tradizionali offre spettacolo vibrante colorato \n" ] } ], "source": [ "print(x_train[0])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-04-15 10:24:34.934289: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", "2024-04-15 10:24:34.972620: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n", "2024-04-15 10:24:34.972649: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n", "2024-04-15 10:24:34.973617: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n", "2024-04-15 10:24:34.979499: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.\n", "2024-04-15 10:24:34.980270: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-04-15 10:24:35.953169: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Found 3919 unique tokens\n", "Found 4573 unique tokens\n" ] } ], "source": [ "#Vettorizzazione del testo tramite tokenizzazione\n", "from keras.preprocessing.text import Tokenizer\n", "from keras.preprocessing.sequence import pad_sequences\n", "import numpy as np\n", "\n", "maxlen = 50\n", "max_words = 10000\n", "\n", "\n", "#Train\n", "tokenizer = Tokenizer(num_words=max_words)\n", "tokenizer.fit_on_texts(x_train)\n", "sequences = tokenizer.texts_to_sequences(x_train)\n", "word_index = tokenizer.word_index\n", "print('Found %s unique tokens' % len(word_index))\n", "\n", "\n", "x_train = pad_sequences(sequences, maxlen=maxlen)\n", "y_train = np.asarray(y_train)\n", "indices = np.arange(x_train.shape[0])\n", "\n", "np.random.shuffle(indices)\n", "x_train = x_train[indices]\n", "y_train = y_train[indices]\n", "\n", "\n", "#Test\n", "tokenizer.fit_on_texts(x_test)\n", "sequences = tokenizer.texts_to_sequences(x_test)\n", "word_index = tokenizer.word_index\n", "print('Found %s unique tokens' % len(word_index))\n", "\n", "x_test = pad_sequences(sequences, maxlen=maxlen)\n", "y_test = np.asarray(y_test)\n", "indices = np.arange(x_test.shape[0])\n", "\n", "np.random.shuffle(indices)\n", "x_test = x_test[indices]\n", "y_test = y_test[indices]\n", "\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 66 1 665 762 11 664 38 269]\n", "[0 1 0]\n" ] } ], "source": [ "print(x_train[0])\n", "print(y_train[0])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 154 168 724 843 66 72 132]\n", "[ 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n", " 0 0 0 0 0 0 0 154 168 724 843 66 72 132]\n" ] } ], "source": [ "print(x_test[0])\n", "print(x_test[0])" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n", "50/50 [==============================] - 1s 6ms/step - loss: 57.7213 - acc: 0.3562 - val_loss: 31.4119 - val_acc: 0.4521\n", "Epoch 2/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 19.3110 - acc: 0.3963 - val_loss: 18.6374 - val_acc: 0.2438\n", "Epoch 3/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 13.5844 - acc: 0.4267 - val_loss: 21.8207 - val_acc: 0.2042\n", "Epoch 4/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 9.6498 - acc: 0.4650 - val_loss: 14.9767 - val_acc: 0.4021\n", "Epoch 5/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 7.9024 - acc: 0.4708 - val_loss: 12.5779 - val_acc: 0.3354\n", "Epoch 6/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 6.1975 - acc: 0.5183 - val_loss: 10.9874 - val_acc: 0.3958\n", "Epoch 7/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 5.1058 - acc: 0.5400 - val_loss: 13.4741 - val_acc: 0.2396\n", "Epoch 8/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 4.6071 - acc: 0.5533 - val_loss: 12.6782 - val_acc: 0.2833\n", "Epoch 9/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 5.5174 - acc: 0.5450 - val_loss: 11.8935 - val_acc: 0.3229\n", "Epoch 10/100\n", "50/50 [==============================] - 0s 4ms/step - loss: 4.9561 - acc: 0.5633 - val_loss: 10.2808 - val_acc: 0.2500\n", "Epoch 11/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 3.7697 - acc: 0.5721 - val_loss: 11.3306 - val_acc: 0.2750\n", "Epoch 12/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 3.3814 - acc: 0.5929 - val_loss: 10.8400 - val_acc: 0.3125\n", "Epoch 13/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 3.0572 - acc: 0.6108 - val_loss: 10.3812 - val_acc: 0.1958\n", "Epoch 14/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 3.3121 - acc: 0.6071 - val_loss: 10.8674 - val_acc: 0.1417\n", "Epoch 15/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 3.1069 - acc: 0.6054 - val_loss: 10.6820 - val_acc: 0.2937\n", "Epoch 16/100\n", "50/50 [==============================] - 0s 4ms/step - loss: 2.8333 - acc: 0.6250 - val_loss: 9.2689 - val_acc: 0.3063\n", "Epoch 17/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 3.0605 - acc: 0.6150 - val_loss: 8.6669 - val_acc: 0.3167\n", "Epoch 18/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 2.1812 - acc: 0.6504 - val_loss: 9.9279 - val_acc: 0.2958\n", "Epoch 19/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 2.2207 - acc: 0.6521 - val_loss: 9.3583 - val_acc: 0.2583\n", "Epoch 20/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 2.1138 - acc: 0.6683 - val_loss: 8.7615 - val_acc: 0.2812\n", "Epoch 21/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 2.4420 - acc: 0.6442 - val_loss: 9.2964 - val_acc: 0.2292\n", "Epoch 22/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 2.0800 - acc: 0.6654 - val_loss: 8.8549 - val_acc: 0.2417\n", "Epoch 23/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.6823 - acc: 0.6871 - val_loss: 8.4278 - val_acc: 0.2854\n", "Epoch 24/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.6339 - acc: 0.6829 - val_loss: 8.4131 - val_acc: 0.3458\n", "Epoch 25/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.7133 - acc: 0.7088 - val_loss: 7.5806 - val_acc: 0.3167\n", "Epoch 26/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.6466 - acc: 0.6917 - val_loss: 8.6205 - val_acc: 0.3479\n", "Epoch 27/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.4558 - acc: 0.7163 - val_loss: 8.4144 - val_acc: 0.2333\n", "Epoch 28/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.4888 - acc: 0.7071 - val_loss: 7.8890 - val_acc: 0.3146\n", "Epoch 29/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.6007 - acc: 0.6996 - val_loss: 10.0653 - val_acc: 0.2896\n", "Epoch 30/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.6436 - acc: 0.7067 - val_loss: 8.0090 - val_acc: 0.2000\n", "Epoch 31/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.4242 - acc: 0.7063 - val_loss: 6.8848 - val_acc: 0.3375\n", "Epoch 32/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.4354 - acc: 0.7146 - val_loss: 7.6492 - val_acc: 0.2479\n", "Epoch 33/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.5739 - acc: 0.6988 - val_loss: 8.2795 - val_acc: 0.2625\n", "Epoch 34/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.4290 - acc: 0.6992 - val_loss: 7.7615 - val_acc: 0.3271\n", "Epoch 35/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.5479 - acc: 0.7050 - val_loss: 7.4564 - val_acc: 0.2896\n", "Epoch 36/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.0855 - acc: 0.7471 - val_loss: 7.2132 - val_acc: 0.3146\n", "Epoch 37/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.6617 - acc: 0.6938 - val_loss: 8.0104 - val_acc: 0.2833\n", "Epoch 38/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.2275 - acc: 0.7396 - val_loss: 7.7752 - val_acc: 0.3125\n", "Epoch 39/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.2972 - acc: 0.7125 - val_loss: 7.7986 - val_acc: 0.2562\n", "Epoch 40/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.0149 - acc: 0.7533 - val_loss: 6.6135 - val_acc: 0.2896\n", "Epoch 41/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.8835 - acc: 0.7708 - val_loss: 6.3778 - val_acc: 0.3187\n", "Epoch 42/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.1054 - acc: 0.7408 - val_loss: 7.7386 - val_acc: 0.3479\n", "Epoch 43/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.2514 - acc: 0.7400 - val_loss: 6.4279 - val_acc: 0.3417\n", "Epoch 44/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.1826 - acc: 0.7454 - val_loss: 6.5688 - val_acc: 0.2917\n", "Epoch 45/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.4454 - acc: 0.7258 - val_loss: 8.4988 - val_acc: 0.2646\n", "Epoch 46/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.0288 - acc: 0.7567 - val_loss: 6.3825 - val_acc: 0.2458\n", "Epoch 47/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.1869 - acc: 0.7421 - val_loss: 6.7040 - val_acc: 0.3375\n", "Epoch 48/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.2335 - acc: 0.7483 - val_loss: 6.7427 - val_acc: 0.2521\n", "Epoch 49/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.1742 - acc: 0.7371 - val_loss: 7.0903 - val_acc: 0.2708\n", "Epoch 50/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7814 - acc: 0.7821 - val_loss: 7.2528 - val_acc: 0.2396\n", "Epoch 51/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.9336 - acc: 0.7625 - val_loss: 6.6921 - val_acc: 0.3104\n", "Epoch 52/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.8222 - acc: 0.7783 - val_loss: 6.9456 - val_acc: 0.2854\n", "Epoch 53/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.0307 - acc: 0.7654 - val_loss: 7.0629 - val_acc: 0.2917\n", "Epoch 54/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.4961 - acc: 0.7212 - val_loss: 7.4154 - val_acc: 0.2729\n", "Epoch 55/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.0491 - acc: 0.7617 - val_loss: 7.5476 - val_acc: 0.2292\n", "Epoch 56/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7880 - acc: 0.7754 - val_loss: 6.0720 - val_acc: 0.3167\n", "Epoch 57/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6842 - acc: 0.8167 - val_loss: 6.3827 - val_acc: 0.2750\n", "Epoch 58/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.8157 - acc: 0.7850 - val_loss: 5.8507 - val_acc: 0.2812\n", "Epoch 59/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6714 - acc: 0.8021 - val_loss: 6.4812 - val_acc: 0.3250\n", "Epoch 60/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.8255 - acc: 0.7862 - val_loss: 6.6680 - val_acc: 0.3646\n", "Epoch 61/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6928 - acc: 0.7942 - val_loss: 6.8407 - val_acc: 0.3688\n", "Epoch 62/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7837 - acc: 0.7887 - val_loss: 6.4081 - val_acc: 0.2708\n", "Epoch 63/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7488 - acc: 0.7954 - val_loss: 6.7094 - val_acc: 0.2250\n", "Epoch 64/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.8052 - acc: 0.7829 - val_loss: 6.0939 - val_acc: 0.3542\n", "Epoch 65/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.8356 - acc: 0.7812 - val_loss: 6.5050 - val_acc: 0.2458\n", "Epoch 66/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6118 - acc: 0.8133 - val_loss: 6.2517 - val_acc: 0.2646\n", "Epoch 67/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5539 - acc: 0.8221 - val_loss: 6.7414 - val_acc: 0.2146\n", "Epoch 68/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6828 - acc: 0.7983 - val_loss: 6.0812 - val_acc: 0.2958\n", "Epoch 69/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7392 - acc: 0.7958 - val_loss: 6.1747 - val_acc: 0.3146\n", "Epoch 70/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6996 - acc: 0.7971 - val_loss: 6.8373 - val_acc: 0.2375\n", "Epoch 71/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6457 - acc: 0.8025 - val_loss: 6.2975 - val_acc: 0.2896\n", "Epoch 72/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5665 - acc: 0.8292 - val_loss: 6.5490 - val_acc: 0.3187\n", "Epoch 73/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6337 - acc: 0.8142 - val_loss: 6.1048 - val_acc: 0.2208\n", "Epoch 74/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.8613 - acc: 0.7821 - val_loss: 5.5585 - val_acc: 0.2771\n", "Epoch 75/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7536 - acc: 0.7979 - val_loss: 6.4135 - val_acc: 0.2333\n", "Epoch 76/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5829 - acc: 0.8146 - val_loss: 5.3976 - val_acc: 0.2771\n", "Epoch 77/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5517 - acc: 0.8221 - val_loss: 6.4450 - val_acc: 0.3021\n", "Epoch 78/100\n", "50/50 [==============================] - 0s 4ms/step - loss: 0.7454 - acc: 0.7896 - val_loss: 5.6916 - val_acc: 0.3500\n", "Epoch 79/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.9174 - acc: 0.7792 - val_loss: 6.2204 - val_acc: 0.3000\n", "Epoch 80/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6210 - acc: 0.8254 - val_loss: 5.8649 - val_acc: 0.2958\n", "Epoch 81/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5516 - acc: 0.8333 - val_loss: 5.6621 - val_acc: 0.2958\n", "Epoch 82/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5606 - acc: 0.8333 - val_loss: 5.6664 - val_acc: 0.2937\n", "Epoch 83/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5652 - acc: 0.8213 - val_loss: 5.7887 - val_acc: 0.3125\n", "Epoch 84/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5395 - acc: 0.8325 - val_loss: 5.8723 - val_acc: 0.2625\n", "Epoch 85/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.4743 - acc: 0.8421 - val_loss: 6.0312 - val_acc: 0.2708\n", "Epoch 86/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.4531 - acc: 0.8567 - val_loss: 5.8157 - val_acc: 0.2542\n", "Epoch 87/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7331 - acc: 0.7979 - val_loss: 6.1701 - val_acc: 0.3000\n", "Epoch 88/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.0382 - acc: 0.7450 - val_loss: 6.6944 - val_acc: 0.3479\n", "Epoch 89/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7748 - acc: 0.7829 - val_loss: 5.9408 - val_acc: 0.2896\n", "Epoch 90/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6449 - acc: 0.8087 - val_loss: 6.6202 - val_acc: 0.2625\n", "Epoch 91/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.7784 - acc: 0.7817 - val_loss: 6.6542 - val_acc: 0.2292\n", "Epoch 92/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 1.0009 - acc: 0.7575 - val_loss: 6.3484 - val_acc: 0.2313\n", "Epoch 93/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.6815 - acc: 0.8037 - val_loss: 5.1077 - val_acc: 0.2896\n", "Epoch 94/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5399 - acc: 0.8288 - val_loss: 5.1831 - val_acc: 0.2854\n", "Epoch 95/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5971 - acc: 0.8304 - val_loss: 5.9116 - val_acc: 0.2208\n", "Epoch 96/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.5117 - acc: 0.8450 - val_loss: 4.9103 - val_acc: 0.3417\n", "Epoch 97/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.3914 - acc: 0.8754 - val_loss: 5.0447 - val_acc: 0.2812\n", "Epoch 98/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.3166 - acc: 0.8929 - val_loss: 5.1480 - val_acc: 0.3146\n", "Epoch 99/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.4117 - acc: 0.8658 - val_loss: 5.3243 - val_acc: 0.2875\n", "Epoch 100/100\n", "50/50 [==============================] - 0s 3ms/step - loss: 0.3687 - acc: 0.8788 - val_loss: 5.9304 - val_acc: 0.2313\n" ] } ], "source": [ "#Definizione del modello e training con/senza word embedding\n", "\n", "from keras.models import Sequential\n", "from keras.layers import Dense\n", "\n", "model = Sequential()\n", "\n", "# model.add(Dense(256, activation='relu'))\n", "model.add(Dense(256, activation='relu'))\n", "model.add(Dense(64, activation='relu'))\n", "model.add(Dense(3, activation='softmax'))\n", "\n", "model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])\n", "\n", "history = model.fit(x_train, y_train, epochs=100, batch_size=48, validation_data=(x_test, y_test))" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"sequential_16\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " dense_43 (Dense) (48, 256) 13056 \n", " \n", " dense_44 (Dense) (48, 64) 16448 \n", " \n", " dense_45 (Dense) (48, 3) 195 \n", " \n", "=================================================================\n", "Total params: 29699 (116.01 KB)\n", "Trainable params: 29699 (116.01 KB)\n", "Non-trainable params: 0 (0.00 Byte)\n", "_________________________________________________________________\n" ] } ], "source": [ "model.summary()" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Tracciamento dei risultati\n", "import matplotlib.pyplot as plt\n", "\n", "acc = history.history['acc']\n", "val_acc = history.history['val_acc']\n", "loss = history.history['loss']\n", "val_loss = history.history['val_loss']\n", "\n", "epochs = range(1, len(acc) + 1)\n", "\n", "plt.plot(epochs, acc, 'bo', label='Training acc')\n", "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n", "plt.title('Training and validation accuracy')\n", "plt.legend()\n", "\n", "plt.figure()\n", "\n", "plt.plot(epochs, loss, 'bo', label='Training loss')\n", "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n", "plt.title('Training and validation loss')\n", "plt.legend()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [], "source": [ "#Salvataggio del modello\n", "\n", "model.save('multiclassification.keras')" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/diceglieg/anaconda3/lib/python3.11/site-packages/keras/src/engine/training.py:3103: UserWarning: You are saving your model as an HDF5 file via `model.save()`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')`.\n", " saving_api.save_model(\n" ] } ], "source": [ "model.save('multiclassification.h5')" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1/1 [==============================] - 0s 52ms/step\n", "Travel and food; Shopping; Cultural activities\n", "[array([77.570816 , 0.2972364, 22.13194 ], dtype=float32)]\n" ] } ], "source": [ "#Test\n", "\n", "#load model\n", "from keras.models import load_model\n", "from keras.preprocessing.sequence import pad_sequences\n", "\n", "loaded_model = load_model('multiclassification.h5')\n", "\n", "sentence = input(\"Enter the sentence: \")\n", "sequence = preprocess_text(sentence)\n", "sequence = tokenizer.texts_to_sequences([sequence])\n", "test = pad_sequences(sequence, maxlen=50)\n", "yhat = loaded_model.predict(test)\n", "\n", "percentages = [pred * 100 for pred in yhat]\n", "\n", "#Provare il curry piccante in India, con una miriade di spezie che esplodono in bocca.\n", "print(\"Travel and food; Shopping; Cultural activities\")\n", "print([out for out in percentages])\n" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "030e154a198946ce9c8bd734085ddaa5", "version_major": 2, "version_minor": 0 }, "text/plain": [ "VBox(children=(HTML(value='
4\u001b[0m push_to_hub_callback \u001b[39m=\u001b[39m PushToHubCallback(\n\u001b[1;32m 5\u001b[0m output_dir\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m/home/diceglieg/NextGenTech/projects/ngt learning/multiclass classification\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 6\u001b[0m tokenizer\u001b[39m=\u001b[39mtokenizer,\n\u001b[1;32m 7\u001b[0m hub_model_id\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mgirolamodiceglie/multiclass-classification\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 8\u001b[0m )\n", "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/transformers/keras_callbacks.py:343\u001b[0m, in \u001b[0;36mPushToHubCallback.__init__\u001b[0;34m(self, output_dir, save_strategy, save_steps, tokenizer, hub_model_id, hub_token, checkpoint, **model_card_args)\u001b[0m\n\u001b[1;32m 340\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhub_model_id \u001b[39m=\u001b[39m create_repo(repo_id\u001b[39m=\u001b[39mhub_model_id, exist_ok\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m, token\u001b[39m=\u001b[39mhub_token)\u001b[39m.\u001b[39mrepo_id\n\u001b[1;32m 342\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39moutput_dir \u001b[39m=\u001b[39m output_dir\n\u001b[0;32m--> 343\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrepo \u001b[39m=\u001b[39m Repository(\u001b[39mstr\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39moutput_dir), clone_from\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhub_model_id, token\u001b[39m=\u001b[39mhub_token)\n\u001b[1;32m 345\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mtokenizer \u001b[39m=\u001b[39m tokenizer\n\u001b[1;32m 346\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlast_job \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m\n", "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[39mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 116\u001b[0m kwargs \u001b[39m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[39m=\u001b[39mfn\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, has_token\u001b[39m=\u001b[39mhas_token, kwargs\u001b[39m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/huggingface_hub/repository.py:516\u001b[0m, in \u001b[0;36mRepository.__init__\u001b[0;34m(self, local_dir, clone_from, repo_type, token, git_user, git_email, revision, skip_lfs_files, client)\u001b[0m\n\u001b[1;32m 513\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mhuggingface_token \u001b[39m=\u001b[39m HfFolder\u001b[39m.\u001b[39mget_token()\n\u001b[1;32m 515\u001b[0m \u001b[39mif\u001b[39;00m clone_from \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m--> 516\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mclone_from(repo_url\u001b[39m=\u001b[39mclone_from)\n\u001b[1;32m 517\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 518\u001b[0m \u001b[39mif\u001b[39;00m is_git_repo(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir):\n", "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/huggingface_hub/utils/_validators.py:118\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[39mif\u001b[39;00m check_use_auth_token:\n\u001b[1;32m 116\u001b[0m kwargs \u001b[39m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[39m=\u001b[39mfn\u001b[39m.\u001b[39m\u001b[39m__name__\u001b[39m, has_token\u001b[39m=\u001b[39mhas_token, kwargs\u001b[39m=\u001b[39mkwargs)\n\u001b[0;32m--> 118\u001b[0m \u001b[39mreturn\u001b[39;00m fn(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n", "File \u001b[0;32m~/anaconda3/lib/python3.11/site-packages/huggingface_hub/repository.py:680\u001b[0m, in \u001b[0;36mRepository.clone_from\u001b[0;34m(self, repo_url, token)\u001b[0m\n\u001b[1;32m 677\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[1;32m 678\u001b[0m \u001b[39m# Check if the folder is the root of a git repository\u001b[39;00m\n\u001b[1;32m 679\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_git_repo(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir):\n\u001b[0;32m--> 680\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mEnvironmentError\u001b[39;00m(\n\u001b[1;32m 681\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mTried to clone a repository in a non-empty folder that isn\u001b[39m\u001b[39m'\u001b[39m\u001b[39mt\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 682\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m a git repository (\u001b[39m\u001b[39m'\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir\u001b[39m}\u001b[39;00m\u001b[39m'\u001b[39m\u001b[39m). If you really want to\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 683\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m do this, do it manually:\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m cd \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir\u001b[39m}\u001b[39;00m\u001b[39m && git init\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 684\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m && git remote add origin && git pull origin main\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m or clone\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 685\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m repo to a new folder and move your existing files there\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 686\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m afterwards.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 687\u001b[0m )\n\u001b[1;32m 689\u001b[0m \u001b[39mif\u001b[39;00m is_local_clone(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir, repo_url):\n\u001b[1;32m 690\u001b[0m logger\u001b[39m.\u001b[39mwarning(\n\u001b[1;32m 691\u001b[0m \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mlocal_dir\u001b[39m}\u001b[39;00m\u001b[39m is already a clone of \u001b[39m\u001b[39m{\u001b[39;00mclean_repo_url\u001b[39m}\u001b[39;00m\u001b[39m.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 692\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m Make sure you pull the latest changes with\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 693\u001b[0m \u001b[39m\"\u001b[39m\u001b[39m `repo.git_pull()`.\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 694\u001b[0m )\n", "\u001b[0;31mOSError\u001b[0m: Tried to clone a repository in a non-empty folder that isn't a git repository ('/home/diceglieg/NextGenTech/projects/ngt learning/multiclass classification'). If you really want to do this, do it manually:\n cd /home/diceglieg/NextGenTech/projects/ngt learning/multiclass classification && git init && git remote add origin && git pull origin main\n or clone repo to a new folder and move your existing files there afterwards." ] } ], "source": [ "from transformers import PushToHubCallback\n", "\n", "\n", "push_to_hub_callback = PushToHubCallback(\n", " output_dir=\"/home/diceglieg/NextGenTech/projects/ngt learning/multiclass classification/\",\n", " tokenizer=tokenizer,\n", " hub_model_id=\"girolamodiceglie/multiclass-classification\",\n", ")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.5" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }