|
|
|
""" |
|
|
|
Developed by Abdul S. |
|
FA20-BCS-OO1 final app.ipynb |
|
|
|
|
|
Automatically generated by Colab |
|
""" |
|
|
|
import pandas as pd |
|
import numpy as np |
|
import gradio as gr |
|
from TweetNormalizer import normalizeTweet |
|
import seaborn as sns |
|
import matplotlib.pyplot as plt |
|
from transformers import pipeline |
|
|
|
|
|
pd.set_option('display.float_format', '{:.2f}'.format) |
|
|
|
pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0') |
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict(text=None , fil=None): |
|
sentiment =None |
|
df=None |
|
fig=None |
|
|
|
if text == None and fil == None: |
|
return "Either enter text or upload .csv or .xlsx file.!" , df, fig |
|
|
|
|
|
preprocessed_text = normalizeTweet(text) |
|
|
|
if fil: |
|
if fil.name.endswith('.csv'): |
|
df = pd.read_csv(fil.name, header=None , names=['tweet'], usecols=[0]) |
|
elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): |
|
df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0]) |
|
else: |
|
raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") |
|
|
|
|
|
lst = list(df.tweet) |
|
m =[normalizeTweet(i) for i in lst] |
|
|
|
d = pd.DataFrame(pipe.predict(m)) |
|
df['label'] = d['label'] |
|
|
|
|
|
|
|
sarcastic_count = np.sum(df.label == 'sarcastic') |
|
non_sarcastic_count = np.sum(df.label =='non_sarcastic') |
|
|
|
labels = ['Sarcastic', 'Non-Sarcastic'] |
|
sizes = [sarcastic_count, non_sarcastic_count] |
|
colors = ['gold', 'lightblue'] |
|
explode = (0.1, 0) |
|
sns.set_style("whitegrid") |
|
fig, ax = plt.subplots() |
|
ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140) |
|
ax.axis('equal') |
|
|
|
plt.title('Sarcastic vs Non-Sarcastic Tweets') |
|
if text == None: |
|
sentiment = df['label'][0] |
|
|
|
if text != "": |
|
prediction = pipe.predict([preprocessed_text])[0] |
|
print(prediction) |
|
|
|
sentiment = "Sarcastic" if prediction['label'] == 'sarcastic' else "Non Sarcastic" |
|
if fil == None: |
|
df= pd.DataFrame([{'tweet':text, 'label':sentiment}]) |
|
|
|
|
|
return sentiment, df, fig |
|
|
|
|
|
|
|
|
|
|
|
file_path =gr.File(label="Upload a File") |
|
output = gr.Label(num_top_classes=2, label="Predicted Labels") |
|
detector = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor") |
|
|
|
|
|
|
|
|
|
|
|
pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0") |
|
|
|
|
|
|
|
def classifyB(text=None , fil=None): |
|
sentiment = None |
|
df = None |
|
fig = None |
|
|
|
if text is None and fil is None: |
|
return "Either enter text or upload .csv or .xlsx file.!" , df, fig |
|
|
|
|
|
|
|
preprocessed_text = normalizeTweet(text) |
|
|
|
|
|
labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question'] |
|
|
|
if fil: |
|
if fil.name.endswith('.csv'): |
|
df = pd.read_csv(fil.name, header=None, names=['tweet'], usecols=[0]) |
|
elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): |
|
df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0]) |
|
else: |
|
raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") |
|
|
|
lst = list(df['tweet']) |
|
m =[normalizeTweet(i) for i in lst] |
|
d = pipe2(m) |
|
|
|
structured_data = [] |
|
|
|
|
|
for item in d: |
|
labels = item['label'] |
|
scores = item['score'] |
|
structured_data.append({ "label": labels, "score": scores}) |
|
|
|
|
|
df1 = pd.DataFrame(structured_data) |
|
df = pd.concat([df, df1], axis=1) |
|
|
|
|
|
fig = plt.figure() |
|
sns.countplot(x='label', data=df, palette='viridis') |
|
plt.title('Result: Count Plot') |
|
plt.xlabel('label') |
|
plt.ylabel('Count') |
|
if text is None: |
|
sentiment = df['label'][0] |
|
|
|
|
|
if text: |
|
prediction = pipe2([preprocessed_text])[0] |
|
|
|
labels = prediction['label'] |
|
scores = prediction['score'] |
|
sentiment = labels |
|
if fil is None: |
|
df= pd.DataFrame([{'tweet':text, 'label':sentiment, "score": scores}]) |
|
|
|
return sentiment, df, fig |
|
|
|
|
|
|
|
file_path =gr.File(label="Upload a File") |
|
label = gr.Label( label="Labels") |
|
classifier = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier") |
|
|
|
main = gr.TabbedInterface([detector, classifier],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" ) |
|
|
|
main.launch(share=True) |