Spaces:

danielcd99
/

IMDB_Reviews

Sleeping

App Files Files Community

danielcd99 commited on Jun 17, 2024

Commit

1ba6bc3

1 Parent(s): e8059ec

added symbolic model

Browse files

Files changed (3) hide show

app.py +4 -2
requirements.txt +2 -1
wordnet.py +80 -0

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import pandas as pd
 from preprocess_data import preprocess_text,get_stopwords
 from datasets import load_dataset
 from transformers import pipeline
 dataset = load_dataset('danielcd99/imdb')
@@ -45,9 +46,10 @@ if st.button('Encontre exemplos!'):
         else:
             predictions.append('Positive')
-    df['predictions'] = predictions
-    cols = ['review','sentiment', 'predictions']
     st.table(df[cols])

 from preprocess_data import preprocess_text,get_stopwords
 from datasets import load_dataset
 from transformers import pipeline
+from wordnet import wordnet_pipeline
 dataset = load_dataset('danielcd99/imdb')
         else:
             predictions.append('Positive')
+    df['bert_results'] = predictions
+    df['wordnet_results'] = wordnet_pipeline(df, 'preprocessed_review')
+    cols = ['review','sentiment', 'bert_results', 'wordnet_results']
     st.table(df[cols])

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 nltk
 transformers==4.28.0
-torch

 nltk
 transformers==4.28.0
+torch
+numpy

wordnet.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import numpy as np
+import nltk
+from nltk.corpus import sentiwordnet as swn
+from nltk.corpus import stopwords
+flatten = lambda l: [item for sublist in l for item in sublist]
+tagsswn = {
+    "NN": "n",
+    "VB": "v",
+    "JJ": "a",
+    "RB": "r",
+}
+def get_sentiment(aval, stopwords):
+    """
+    Calcula o score de sentimento de um texto usando SentiWordNet.
+    Entrada:
+        aval (str): Texto a ser analisado.
+    Saída:
+        tuple: Score positivo e negativo do texto.
+    """
+    pos_scores = []
+    neg_scores = []
+    sentences = nltk.sent_tokenize(aval)
+    sentence_words = [nltk.word_tokenize(sentence) for sentence in sentences]
+    tagged_sentence_words = flatten(nltk.pos_tag_sents(sentence_words))
+    tagged_sentence_words = [word for word in tagged_sentence_words if word[0].lower() not in stopwords]
+    for word, pos in tagged_sentence_words:
+        swn_pos = tagsswn.get(pos[:2], None)
+        if not swn_pos:
+            continue
+        synsets = list(swn.senti_synsets(word.lower(), swn_pos))
+        if not synsets:
+            continue
+        synset = synsets[0]
+        pos_scores.append(synset.pos_score())
+        neg_scores.append(synset.neg_score())
+    sump = np.sum(pos_scores) if pos_scores else 0
+    sumn = np.sum(neg_scores) if neg_scores else 0
+    return sump, sumn
+def classify_sentiment(aval, stopwords):
+    """
+    Classifica um texto como positivo ou negativo com base no score de sentimento.
+    Entrada:
+        aval (str): Texto a ser classificado.
+    Saída:
+        str: "positive" se o score positivo for maior, "negative" caso contrário.
+    """
+    pos_score, neg_score = get_sentiment(aval, stopwords)
+    return "positive" if pos_score > neg_score else "negative"
+def wordnet_pipeline(df, column):
+    nltk.download('sentiwordnet')
+    nltk.download('wordnet')
+    nltk.download('stopwords')
+    nltk.download('punkt')
+    nltk.download('averaged_perceptron_tagger')
+    stpwrds = set(stopwords.words("english"))
+    l = []
+    for review in df[column]:
+        l.append(classify_sentiment(review, stpwrds))
+    return l