girolamodiceglie commited on
Commit
bc21e8a
1 Parent(s): e7c153b
Files changed (1) hide show
  1. sentiment.ipynb +5 -224
sentiment.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 4,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -26,16 +26,6 @@
26
  " return processed_text.translate(str.maketrans('','', string.punctuation))"
27
  ]
28
  },
29
- {
30
- "cell_type": "code",
31
- "execution_count": null,
32
- "metadata": {},
33
- "outputs": [],
34
- "source": [
35
- "import nltk\n",
36
- "nltk.download()"
37
- ]
38
- },
39
  {
40
  "cell_type": "code",
41
  "execution_count": null,
@@ -97,171 +87,13 @@
97
  "metadata": {},
98
  "outputs": [],
99
  "source": [
100
- "\n",
101
- "sentence = input(\"Enter the sentence: \")\n",
102
- "\n",
103
- "preprocess_text(sentence)"
104
- ]
105
- },
106
- {
107
- "cell_type": "code",
108
- "execution_count": null,
109
- "metadata": {},
110
- "outputs": [],
111
- "source": [
112
- "\n",
113
- "df_train = pd.read_csv('recensioni_train.csv')\n",
114
- "df_test = pd.read_csv('recensioni_test.csv')\n",
115
- "\n",
116
- "X_train = df_train['text'].apply(preprocess_text)\n",
117
- "X_test = df_test['text'].apply(preprocess_text)\n",
118
- "\n",
119
- "tags_train = df_train['tag']\n",
120
- "tags_test = df_test['tag']\n",
121
- "\n",
122
- "y_train = []\n",
123
- "y_test = []\n",
124
- "\n",
125
- "#Train\n",
126
- "for e in tags_train:\n",
127
- " if e=='pos':\n",
128
- " y_train.append(1)\n",
129
- " else:\n",
130
- " y_train.append(0)\n",
131
- "\n",
132
- "#Test\n",
133
- "for e in tags_test:\n",
134
- " if e=='pos':\n",
135
- " y_test.append(1)\n",
136
- " else:\n",
137
- " y_test.append(0)\n",
138
- "\n"
139
- ]
140
- },
141
- {
142
- "cell_type": "code",
143
- "execution_count": null,
144
- "metadata": {},
145
- "outputs": [],
146
- "source": [
147
- "#######################\n",
148
- "\n",
149
- "tokenizer_train = Tokenizer(num_words=10000)\n",
150
- "tokenizer_train.fit_on_texts(X_train)\n",
151
- "sequences_train = tokenizer_train.texts_to_sequences(X_train)\n",
152
- "word_index_train = tokenizer_train.word_index\n",
153
- "print('Found %s unique tokens' % len(word_index_train))\n",
154
- "\n",
155
  "print(X_train[0])\n",
156
  "print(y_train[0])\n",
157
  "\n",
158
- "#######################\n",
159
- "\n",
160
- "tokenizer_test = Tokenizer(num_words=10000)\n",
161
- "tokenizer_test.fit_on_texts(X_test)\n",
162
- "sequences_test = tokenizer_test.texts_to_sequences(X_test)\n",
163
- "word_index_test = tokenizer_test.word_index\n",
164
- "print('Found %s unique tokens' % len(word_index_test))\n",
165
- "\n",
166
  "print(X_test[0])\n",
167
  "print(y_test[0])"
168
  ]
169
  },
170
- {
171
- "cell_type": "code",
172
- "execution_count": null,
173
- "metadata": {},
174
- "outputs": [],
175
- "source": [
176
- "#Dataset NGT\n",
177
- "\n",
178
- "tokenizer_ngt = Tokenizer(num_words=10000)\n",
179
- "tokenizer_ngt.fit_on_texts(X_ngt)\n",
180
- "sequences_ngt = tokenizer_ngt.texts_to_sequences(X_ngt)\n",
181
- "word_index_ngt = tokenizer_ngt.word_index\n",
182
- "print('Found %s unique tokens' % len(word_index_ngt))\n"
183
- ]
184
- },
185
- {
186
- "cell_type": "code",
187
- "execution_count": null,
188
- "metadata": {},
189
- "outputs": [],
190
- "source": [
191
- "#Dataset NGT\n",
192
- "\n",
193
- "X_ngt = pad_sequences(sequences_ngt)\n",
194
- "y_ngt = np.asarray(y_ngt)\n",
195
- "indices_ngt = np.arange(X_ngt.shape[0])\n",
196
- "\n",
197
- "\n",
198
- "np.random.shuffle(indices_ngt)\n",
199
- "X_ngt = X_ngt[indices_ngt]\n",
200
- "y_ngt = y_ngt[indices_ngt]\n",
201
- "\n",
202
- "X_train, X_test, y_train, y_test = train_test_split(X_ngt, y_ngt, test_size=0.2)\n"
203
- ]
204
- },
205
- {
206
- "cell_type": "code",
207
- "execution_count": null,
208
- "metadata": {},
209
- "outputs": [],
210
- "source": [
211
- "print(X_train[0])\n",
212
- "print(y_train[0])\n",
213
- "\n",
214
- "print(X_test[0])\n",
215
- "print(y_test[0])"
216
- ]
217
- },
218
- {
219
- "cell_type": "code",
220
- "execution_count": null,
221
- "metadata": {},
222
- "outputs": [],
223
- "source": [
224
- "X_train = pad_sequences(sequences_train)\n",
225
- "y_train = np.asarray(y_train)\n",
226
- "indices_train = np.arange(X_train.shape[0])\n",
227
- "\n",
228
- "\n",
229
- "X_test = pad_sequences(sequences_test)\n",
230
- "y_test = np.asarray(y_test)\n",
231
- "indices_test = np.arange(X_test.shape[0])\n",
232
- "\n",
233
- "print(indices_train)\n",
234
- "print(X_train[0])\n",
235
- "print(y_train[0])\n"
236
- ]
237
- },
238
- {
239
- "cell_type": "code",
240
- "execution_count": null,
241
- "metadata": {},
242
- "outputs": [],
243
- "source": [
244
- "np.random.shuffle(indices_train)\n",
245
- "X_train = X_train[indices_train]\n",
246
- "y_train = y_train[indices_train]\n",
247
- "\n",
248
- "\n",
249
- "np.random.shuffle(indices_test)\n",
250
- "X_test = X_train[indices_test]\n",
251
- "y_test = y_train[indices_test]"
252
- ]
253
- },
254
- {
255
- "cell_type": "code",
256
- "execution_count": null,
257
- "metadata": {},
258
- "outputs": [],
259
- "source": [
260
- "X_train.shape\n",
261
- "\n",
262
- "print(X_train.dtype)\n"
263
- ]
264
- },
265
  {
266
  "cell_type": "code",
267
  "execution_count": null,
@@ -277,8 +109,6 @@
277
  "model.add(Dense(32, activation='relu'))\n",
278
  "model.add(Dense(1, activation='sigmoid'))\n",
279
  "\n",
280
- "#model.compile(optimizer='SGD', loss='binary_crossentropy', metrics=['acc'])\n",
281
- "#model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])\n",
282
  "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])\n",
283
  "\n",
284
  "history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))"
@@ -331,9 +161,7 @@
331
  "source": [
332
  "\n",
333
  "#Salvataggio del modello\n",
334
- "\n",
335
- "model.save('binary.keras')\n",
336
- "\n"
337
  ]
338
  },
339
  {
@@ -341,62 +169,15 @@
341
  "execution_count": null,
342
  "metadata": {},
343
  "outputs": [],
344
- "source": [
345
- "# Dataset ngt\n",
346
- "# model.add(Dense(512, activation='relu'))\n",
347
- "# model.add(Dense(8, activation='relu'))\n",
348
- "# model.add(Dense(1, activation='sigmoid'))\n",
349
- "\n",
350
- "\n",
351
- "# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])\n",
352
- "\n",
353
- "# history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))\n",
354
- "\n",
355
- "\n",
356
- "# Epoch 10/10\n",
357
- "# 100/100 [==============================] - 0s 3ms/step - loss: 0.6099 - acc: 0.6712 - val_loss: 0.6311 - val_acc: 0.6525\n",
358
- "\n",
359
- "\n",
360
- "################################################\n",
361
- "\n",
362
- "\n",
363
- "# Altro dataset\n",
364
- "# model.add(Dense(512, activation='relu'))\n",
365
- "# model.add(Dense(32, activation='relu'))\n",
366
- "# model.add(Dense(1, activation='sigmoid'))\n",
367
- "\n",
368
- "# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])\n",
369
- "\n",
370
- "# history = model.fit(X_train, y_train, epochs=5, batch_size=32, validation_data=(X_test, y_test))\n",
371
- "\n",
372
- "# Epoch 5/5\n",
373
- "# 63/63 [==============================] - 0s 3ms/step - loss: 0.5344 - acc: 0.7185 - val_loss: 0.5255 - val_acc: 0.7525"
374
- ]
375
- },
376
- {
377
- "cell_type": "code",
378
- "execution_count": 19,
379
- "metadata": {},
380
- "outputs": [
381
- {
382
- "name": "stdout",
383
- "output_type": "stream",
384
- "text": [
385
- "1/1 [==============================] - 0s 51ms/step\n",
386
- "NEGATIVO 58 %\n"
387
- ]
388
- }
389
- ],
390
  "source": [
391
  "#Test\n",
392
  "\n",
393
- "#load model\n",
394
  "from keras.models import load_model\n",
395
  "from keras.preprocessing.sequence import pad_sequences\n",
396
  "from keras.preprocessing.text import Tokenizer\n",
397
- "from keras.preprocessing.text import Tokenizer\n",
398
  "\n",
399
- "loaded_model = load_model('sentiment_dfngt.keras')\n",
400
  "\n",
401
  "sentence = input(\"Enter the sentence: \")\n",
402
  "sequence = preprocess_text(sentence)\n",
@@ -429,7 +210,7 @@
429
  "name": "python",
430
  "nbconvert_exporter": "python",
431
  "pygments_lexer": "ipython3",
432
- "version": "3.11.5"
433
  },
434
  "orig_nbformat": 4
435
  },
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": null,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
26
  " return processed_text.translate(str.maketrans('','', string.punctuation))"
27
  ]
28
  },
 
 
 
 
 
 
 
 
 
 
29
  {
30
  "cell_type": "code",
31
  "execution_count": null,
 
87
  "metadata": {},
88
  "outputs": [],
89
  "source": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  "print(X_train[0])\n",
91
  "print(y_train[0])\n",
92
  "\n",
 
 
 
 
 
 
 
 
93
  "print(X_test[0])\n",
94
  "print(y_test[0])"
95
  ]
96
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  {
98
  "cell_type": "code",
99
  "execution_count": null,
 
109
  "model.add(Dense(32, activation='relu'))\n",
110
  "model.add(Dense(1, activation='sigmoid'))\n",
111
  "\n",
 
 
112
  "model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])\n",
113
  "\n",
114
  "history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))"
 
161
  "source": [
162
  "\n",
163
  "#Salvataggio del modello\n",
164
+ "model.save('model.keras')"
 
 
165
  ]
166
  },
167
  {
 
169
  "execution_count": null,
170
  "metadata": {},
171
  "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  "source": [
173
  "#Test\n",
174
  "\n",
175
+ "#Load model\n",
176
  "from keras.models import load_model\n",
177
  "from keras.preprocessing.sequence import pad_sequences\n",
178
  "from keras.preprocessing.text import Tokenizer\n",
 
179
  "\n",
180
+ "loaded_model = load_model('model.keras')\n",
181
  "\n",
182
  "sentence = input(\"Enter the sentence: \")\n",
183
  "sequence = preprocess_text(sentence)\n",
 
210
  "name": "python",
211
  "nbconvert_exporter": "python",
212
  "pygments_lexer": "ipython3",
213
+ "version": "2.7.18"
214
  },
215
  "orig_nbformat": 4
216
  },