Spaces:
Runtime error
Runtime error
Joshua1808
commited on
Commit
•
4913bb4
1
Parent(s):
a1ace41
Update app.py
Browse files
app.py
CHANGED
@@ -6,11 +6,13 @@ import numpy as np
|
|
6 |
import pysentimiento
|
7 |
import geopy
|
8 |
import matplotlib.pyplot as plt
|
|
|
9 |
|
10 |
|
11 |
from pysentimiento.preprocessing import preprocess_tweet
|
12 |
from geopy.geocoders import Nominatim
|
13 |
from transformers import pipeline
|
|
|
14 |
|
15 |
|
16 |
model_checkpoint = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
|
@@ -51,7 +53,23 @@ def preprocess(text):
|
|
51 |
text=re.sub(r"\)","",text)
|
52 |
text=" ".join(text.split())
|
53 |
return text
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
def highlight_survived(s):
|
57 |
return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
|
@@ -93,18 +111,21 @@ def analizar_tweets(search_words, number_of_tweets):
|
|
93 |
if (tweet.full_text.startswith('RT')):
|
94 |
continue
|
95 |
else:
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
df = pd.DataFrame(result)
|
105 |
-
#df['Prediccion'] = np.where( df['Prediccion'] == 'LABEL_1', 'Sexista', 'No Sexista')
|
106 |
-
#df = df[df["Prediccion"] == 'Sexista']
|
107 |
-
#df = df[df["Probabilidad"] > 0.5]
|
108 |
if df.empty:
|
109 |
muestra= st.text("No hay tweets Sexistas a analizar")
|
110 |
tabla.append(muestra)
|
@@ -114,17 +135,6 @@ def analizar_tweets(search_words, number_of_tweets):
|
|
114 |
df['Probabilidad'] = df['Probabilidad'].apply(lambda x: round(x, 3))
|
115 |
muestra = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
|
116 |
tabla.append(muestra)
|
117 |
-
#resultado=df.groupby('Prediccion')['Probabilidad'].sum()
|
118 |
-
#colores=["#aae977","#EE3555"]
|
119 |
-
#fig, ax = plt.subplots(figsize=(2, 1), subplotpars=None)
|
120 |
-
#plt.pie(resultado,labels=resultado.index,autopct='%1.1f%%',colors=colores)
|
121 |
-
#ax.set_title("Porcentajes por Categorias", fontsize=2, fontweight="bold")
|
122 |
-
#plt.rcParams.update({'font.size':2, 'font.weight':'bold'})
|
123 |
-
#ax.legend()
|
124 |
-
# Muestra el gráfico
|
125 |
-
#plt.show()
|
126 |
-
#st.set_option('deprecation.showPyplotGlobalUse', False)
|
127 |
-
#st.pyplot()
|
128 |
except Exception as e:
|
129 |
muestra = st.text(f"La cuenta {search_words} no existe.")
|
130 |
tabla.append(muestra)
|
@@ -204,7 +214,7 @@ def analizar_frase(frase):
|
|
204 |
if frase == "":
|
205 |
tabla = st.text("Ingrese una frase")
|
206 |
#st.text("Ingrese una frase")
|
207 |
-
|
208 |
predictions = pipeline_nlp(frase)
|
209 |
# convierte las predicciones en una lista de diccionarios
|
210 |
data = [{'Texto': frase, 'Prediccion': prediction['label'], 'Probabilidad': prediction['score']} for prediction in predictions]
|
|
|
6 |
import pysentimiento
|
7 |
import geopy
|
8 |
import matplotlib.pyplot as plt
|
9 |
+
import langdetect
|
10 |
|
11 |
|
12 |
from pysentimiento.preprocessing import preprocess_tweet
|
13 |
from geopy.geocoders import Nominatim
|
14 |
from transformers import pipeline
|
15 |
+
from langdetect import detect
|
16 |
|
17 |
|
18 |
model_checkpoint = "hackathon-pln-es/twitter_sexismo-finetuned-robertuito-exist2021"
|
|
|
53 |
text=re.sub(r"\)","",text)
|
54 |
text=" ".join(text.split())
|
55 |
return text
|
56 |
+
|
57 |
+
def clean_tweet(tweet):
|
58 |
+
# Eliminar emojis
|
59 |
+
tweet = re.sub(r'[\U0001F600-\U0001F64F]', '', tweet)
|
60 |
+
tweet = re.sub(r'[\U0001F300-\U0001F5FF]', '', tweet)
|
61 |
+
tweet = re.sub(r'[\U0001F680-\U0001F6FF]', '', tweet)
|
62 |
+
tweet = re.sub(r'[\U0001F1E0-\U0001F1FF]', '', tweet)
|
63 |
+
# Eliminar arrobas
|
64 |
+
tweet = re.sub(r'@\w+', '', tweet)
|
65 |
+
# Eliminar URL
|
66 |
+
tweet = re.sub(r'http\S+', '', tweet)
|
67 |
+
# Eliminar hashtags
|
68 |
+
tweet = re.sub(r'#\w+', '', tweet)
|
69 |
+
# Eliminar caracteres especiales
|
70 |
+
#tweet = re.sub(r'[^a-zA-Z0-9 \n\.]', '', tweet)
|
71 |
+
tweet = re.sub(r'[^a-zA-Z0-9 \n\áéíóúÁÉÍÓÚñÑ.]', '', tweet)
|
72 |
+
return tweet
|
73 |
|
74 |
def highlight_survived(s):
|
75 |
return ['background-color: red']*len(s) if (s.Sexista == 1) else ['background-color: green']*len(s)
|
|
|
111 |
if (tweet.full_text.startswith('RT')):
|
112 |
continue
|
113 |
else:
|
114 |
+
text = tweet.full_text
|
115 |
+
try:
|
116 |
+
language = detect(text)
|
117 |
+
if language == 'es':
|
118 |
+
datos=clean_tweet(text)
|
119 |
+
if datos == "":
|
120 |
+
continue
|
121 |
+
else:
|
122 |
+
prediction = pipeline_nlp(datos)
|
123 |
+
for predic in prediction:
|
124 |
+
etiqueta = {'Tweets': datos, 'Prediccion': predic['label'], 'Probabilidad': predic['score']}
|
125 |
+
result.append(etiqueta)
|
126 |
+
except:
|
127 |
+
pass
|
128 |
df = pd.DataFrame(result)
|
|
|
|
|
|
|
129 |
if df.empty:
|
130 |
muestra= st.text("No hay tweets Sexistas a analizar")
|
131 |
tabla.append(muestra)
|
|
|
135 |
df['Probabilidad'] = df['Probabilidad'].apply(lambda x: round(x, 3))
|
136 |
muestra = st.table(df.reset_index(drop=True).head(30).style.applymap(color_survived, subset=['Prediccion']))
|
137 |
tabla.append(muestra)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
except Exception as e:
|
139 |
muestra = st.text(f"La cuenta {search_words} no existe.")
|
140 |
tabla.append(muestra)
|
|
|
214 |
if frase == "":
|
215 |
tabla = st.text("Ingrese una frase")
|
216 |
#st.text("Ingrese una frase")
|
217 |
+
elif language == 'es':
|
218 |
predictions = pipeline_nlp(frase)
|
219 |
# convierte las predicciones en una lista de diccionarios
|
220 |
data = [{'Texto': frase, 'Prediccion': prediction['label'], 'Probabilidad': prediction['score']} for prediction in predictions]
|