# -*- coding: utf-8 -*- """ Developed by Abdul S. FA20-BCS-OO1 final app.ipynb Automatically generated by Colab """ import pandas as pd import numpy as np import gradio as gr from TweetNormalizer import normalizeTweet import seaborn as sns import matplotlib.pyplot as plt from transformers import pipeline # Set pandas display option to show only 2 decimal places pd.set_option('display.float_format', '{:.2f}'.format) pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0') # pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl') # def predict(text=None , fil=None): sentiment =None df=None fig=None if text == None and fil == None: return "Either enter text or upload .csv or .xlsx file.!" , df, fig # Preprocess the text preprocessed_text = normalizeTweet(text) if fil: if fil.name.endswith('.csv'): df = pd.read_csv(fil.name, header=None , names=['tweet'], usecols=[0]) elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0]) else: raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") lst = list(df.tweet) m =[normalizeTweet(i) for i in lst] d = pd.DataFrame(pipe.predict(m)) df['label'] = d['label'] sarcastic_count = np.sum(df.label == 'sarcastic') non_sarcastic_count = np.sum(df.label =='non_sarcastic') labels = ['Sarcastic', 'Non-Sarcastic'] sizes = [sarcastic_count, non_sarcastic_count] colors = ['gold', 'lightblue'] explode = (0.1, 0) # explode 1st slice sns.set_style("whitegrid") fig, ax = plt.subplots() ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140) #, colors=colors ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. plt.title('Sarcastic vs Non-Sarcastic Tweets') if text == None: sentiment = df['label'][0] if text != "": prediction = pipe.predict([preprocessed_text])[0] print(prediction) sentiment = "Sarcastic" if prediction['label'] == 'sarcastic' else "Non Sarcastic" if fil == None: df= pd.DataFrame([{'tweet':text, 'label':sentiment}]) return sentiment, df, fig file_path =gr.File(label="Upload a File") output = gr.Label(num_top_classes=2, label="Predicted Labels") detector = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor") # demo.launch(debug=True) # load classifier pipeline pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0") # classifier def classifyB(text=None , fil=None): sentiment = None df = None fig = None if text is None and fil is None: return "Either enter text or upload .csv or .xlsx file.!" , df, fig # Preprocess the text preprocessed_text = normalizeTweet(text) labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question'] if fil: if fil.name.endswith('.csv'): df = pd.read_csv(fil.name, header=None, names=['tweet'], usecols=[0]) elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0]) else: raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") lst = list(df['tweet']) m =[normalizeTweet(i) for i in lst] d = pipe2(m) structured_data = [] # Iterate over the list of dictionaries and convert each to a structured dictionary for item in d: labels = item['label'] scores = item['score'] structured_data.append({ "label": labels, "score": scores}) # Convert the list of dictionaries to a DataFrame df1 = pd.DataFrame(structured_data) df = pd.concat([df, df1], axis=1) fig = plt.figure() #figsize=(8, 6) sns.countplot(x='label', data=df, palette='viridis') plt.title('Result: Count Plot') # Add a title to the plot plt.xlabel('label') # Add label for the x-axis plt.ylabel('Count') if text is None: sentiment = df['label'][0] # Perform sentiment prediction if text: prediction = pipe2([preprocessed_text])[0] # print(prediction["label"]) labels = prediction['label'] scores = prediction['score'] sentiment = labels if fil is None: df= pd.DataFrame([{'tweet':text, 'label':sentiment, "score": scores}]) return sentiment, df, fig file_path =gr.File(label="Upload a File") label = gr.Label( label="Labels") classifier = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier") #,theme= 'darkhuggingface' main = gr.TabbedInterface([detector, classifier],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" ) main.launch(share=True)