Joshua1808 commited on
Commit
4913bb4
1 Parent(s): a1ace41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -24
app.py CHANGED
@@ -6,11 +6,13 @@ import numpy as np
6
  import pysentimiento
7
  import geopy
8
  import matplotlib.pyplot as plt
 
9
 
10
 
11
  from pysentimiento.preprocessing import preprocess_tweet
12
  from geopy.geocoders import Nominatim
13
  from transformers import pipeline
 
14
 
15
 
16
  model_checkpoint = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
@@ -51,7 +53,23 @@ def preprocess(text):
51
  text=re.sub(r"\)","",text)
52
  text=" ".join(text.split())
53
  return text
54
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def highlight_survived(s):
57
  return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
@@ -93,18 +111,21 @@ def analizar_tweets(search_words, number_of_tweets):
93
  if (tweet.full_text.startswith('RT')):
94
  continue
95
  else:
96
- datos = preprocess(tweet.full_text)
97
- if datos == "":
98
- continue
99
- else:
100
- prediction = pipeline_nlp(datos)
101
- for predic in prediction:
102
- etiqueta = {'Tweets': datos,'Prediccion': predic['label'], 'Probabilidad': predic['score']}
103
- result.append(etiqueta)
 
 
 
 
 
 
104
  df = pd.DataFrame(result)
105
- #df['Prediccion'] = np.where( df['Prediccion'] == 'LABEL_1', 'Sexista', 'No Sexista')
106
- #df = df[df["Prediccion"] == 'Sexista']
107
- #df = df[df["Probabilidad"] > 0.5]
108
  if df.empty:
109
  muestra= st.text("No hay tweets Sexistas a analizar")
110
  tabla.append(muestra)
@@ -114,17 +135,6 @@ def analizar_tweets(search_words, number_of_tweets):
114
  df['Probabilidad'] = df['Probabilidad'].apply(lambda x: round(x, 3))
115
  muestra = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
116
  tabla.append(muestra)
117
- #resultado=df.groupby('Prediccion')['Probabilidad'].sum()
118
- #colores=["#aae977","#EE3555"]
119
- #fig, ax = plt.subplots(figsize=(2, 1), subplotpars=None)
120
- #plt.pie(resultado,labels=resultado.index,autopct='%1.1f%%',colors=colores)
121
- #ax.set_title("Porcentajes por Categorias", fontsize=2, fontweight="bold")
122
- #plt.rcParams.update({'font.size':2, 'font.weight':'bold'})
123
- #ax.legend()
124
- # Muestra el gráfico
125
- #plt.show()
126
- #st.set_option('deprecation.showPyplotGlobalUse', False)
127
- #st.pyplot()
128
  except Exception as e:
129
  muestra = st.text(f"La cuenta {search_words} no existe.")
130
  tabla.append(muestra)
@@ -204,7 +214,7 @@ def analizar_frase(frase):
204
  if frase == "":
205
  tabla = st.text("Ingrese una frase")
206
  #st.text("Ingrese una frase")
207
- else:
208
  predictions = pipeline_nlp(frase)
209
  # convierte las predicciones en una lista de diccionarios
210
  data = [{'Texto': frase, 'Prediccion': prediction['label'], 'Probabilidad': prediction['score']} for prediction in predictions]
 
6
  import pysentimiento
7
  import geopy
8
  import matplotlib.pyplot as plt
9
+ import langdetect
10
 
11
 
12
  from pysentimiento.preprocessing import preprocess_tweet
13
  from geopy.geocoders import Nominatim
14
  from transformers import pipeline
15
+ from langdetect import detect
16
 
17
 
18
  model_checkpoint = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
 
53
  text=re.sub(r"\)","",text)
54
  text=" ".join(text.split())
55
  return text
56
+
57
+ def clean_tweet(tweet):
58
+ # Eliminar emojis
59
+ tweet = re.sub(r'[\U0001F600-\U0001F64F]', '', tweet)
60
+ tweet = re.sub(r'[\U0001F300-\U0001F5FF]', '', tweet)
61
+ tweet = re.sub(r'[\U0001F680-\U0001F6FF]', '', tweet)
62
+ tweet = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', tweet)
63
+ # Eliminar arrobas
64
+ tweet = re.sub(r'@\w+', '', tweet)
65
+ # Eliminar URL
66
+ tweet = re.sub(r'http\S+', '', tweet)
67
+ # Eliminar hashtags
68
+ tweet = re.sub(r'#\w+', '', tweet)
69
+ # Eliminar caracteres especiales
70
+ #tweet = re.sub(r'[^a-zA-Z0-9 \n\.]', '', tweet)
71
+ tweet = re.sub(r'[^a-zA-Z0-9 \n\áéíóúÁÉÍÓÚñÑ.]', '', tweet)
72
+ return tweet
73
 
74
  def highlight_survived(s):
75
  return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
 
111
  if (tweet.full_text.startswith('RT')):
112
  continue
113
  else:
114
+ text = tweet.full_text
115
+ try:
116
+ language = detect(text)
117
+ if language == 'es':
118
+ datos=clean_tweet(text)
119
+ if datos == "":
120
+ continue
121
+ else:
122
+ prediction = pipeline_nlp(datos)
123
+ for predic in prediction:
124
+ etiqueta = {'Tweets': datos, 'Prediccion': predic['label'], 'Probabilidad': predic['score']}
125
+ result.append(etiqueta)
126
+ except:
127
+ pass
128
  df = pd.DataFrame(result)
 
 
 
129
  if df.empty:
130
  muestra= st.text("No hay tweets Sexistas a analizar")
131
  tabla.append(muestra)
 
135
  df['Probabilidad'] = df['Probabilidad'].apply(lambda x: round(x, 3))
136
  muestra = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
137
  tabla.append(muestra)
 
 
 
 
 
 
 
 
 
 
 
138
  except Exception as e:
139
  muestra = st.text(f"La cuenta {search_words} no existe.")
140
  tabla.append(muestra)
 
214
  if frase == "":
215
  tabla = st.text("Ingrese una frase")
216
  #st.text("Ingrese una frase")
217
+ elif language == 'es':
218
  predictions = pipeline_nlp(frase)
219
  # convierte las predicciones en una lista de diccionarios
220
  data = [{'Texto': frase, 'Prediccion': prediction['label'], 'Probabilidad': prediction['score']} for prediction in predictions]