# -*- coding: utf-8 -*- """FA20-BCS-OO1 final app.ipynb Automatically generated by Colab """ # !pip install emoji gradio import joblib, pickle, pandas as pd, numpy as np import gradio as gr from TweetNormalizer import normalizeTweet import seaborn as sns import matplotlib.pyplot as plt from transformers import pipeline # seek007/taskA-DeBERTa-bweet-1.2.5 # seek007/taskA-DeBERTa-large-1.0.0 # seek007/taskA-DeBERTa-bweet-1.1.0 pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0') # pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl') import numpy as np def predict(text=None , fil=None): # Preprocess the text preprocessed_text = normalizeTweet(text) sentiment =None df=None fig=None if fil: if fil.name.endswith('.csv'): df = pd.read_csv(fil.name) elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): df = pd.read_excel(fil.name) else: raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") # df= df.sample(20) lst = list(df.tweet) m =[normalizeTweet(i) for i in lst] # m = [truncate_string(i) for i in m] d = pd.DataFrame(pipe.predict(m)) df['label'] = d['label'] # print(df.sample(5)) df.drop('sarcastic', axis=1, inplace=True) # print(df.sample(5)) mapping = { 'LABEL_0': 'non_sarcastic', 'LABEL_1': 'sarcastic' } # df['label']=df['label'].map(mapping) sarcastic_count = np.sum(df.label =='sarcastic') non_sarcastic_count = np.sum(df.label =='non_sarcastic') labels = ['Sarcastic', 'Non-Sarcastic'] sizes = [sarcastic_count, non_sarcastic_count] colors = ['gold', 'lightblue'] explode = (0.1, 0) # explode 1st slice sns.set_style("whitegrid") fig, ax = plt.subplots() ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140) #, colors=colors ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle. plt.title('Sarcastic vs Non-Sarcastic Tweets') # fig = plt.figure() #figsize=(8, 6) # sns.countplot(x='label', data=df, palette='viridis') # plt.title('Result: Count Plot') # Add a title to the plot # plt.xlabel('label') # Add label for the x-axis # plt.ylabel('Count') # Perform sentiment prediction if text !="" or fil !=None: prediction = pipe.predict([preprocessed_text])[0] print(prediction) # sentiment = {p['label']: p['score'] for p in prediction} # sentiment[''] # print(sentiment) sentiment = "Sarcastic" if (prediction['label'] == 'LABEL_1' or prediction['label'] =='sarcastic') else "Non Sarcastic" if fil == None: df= pd.DataFrame([{'tweet':text, 'label':sentiment}]) else: return "Either enter text or upload .csv or .xlsx file.!" , df, fig return sentiment, df, fig file_path =gr.File(label="Upload a File") output = gr.Label(num_top_classes=2, label="Predicted Labels") demo = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor") # demo.launch(debug=True) file_path =gr.File(label="Upload a File") label = gr.Label(num_top_classes=3, label="Top 3 Labels") classification = gr.Interface(classify, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier") # classification.launch(debug=True) from transformers import pipeline pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0") def classifyB(text=None , fil=None): # Preprocess the text preprocessed_text = normalizeTweet(text) sentiment =None df=None fig=None labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question'] if fil: if fil.name.endswith('.csv'): df = pd.read_csv(fil.name) elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'): df = pd.read_excel(fil.name) else: raise ValueError("Unsupported file type. Please upload a CSV or Excel file.") lst = list(df.tweet) m =[normalizeTweet(i) for i in lst] # m = [truncate_string(i) for i in m] d = pipe2(m) structured_data = [] # Iterate over the list of dictionaries and convert each to a structured dictionary for item in d: labels = item['label'] scores = item['score'] structured_data.append({ "label": labels, "score": scores}) # Convert the list of dictionaries to a DataFrame df1 = pd.DataFrame(structured_data) df = pd.concat([df, df1], axis=1) # df["labels"] = d['labels'] # print("df: ",df.head()) # return df.head() fig = plt.figure() #figsize=(8, 6) sns.countplot(x='label', data=df, palette='viridis') plt.title('Result: Count Plot') # Add a title to the plot plt.xlabel('label') # Add label for the x-axis plt.ylabel('Count') # Perform sentiment prediction if text !=None or fil !=None: prediction = pipe2([preprocessed_text])[0] print(prediction["label"]) labels = prediction['label'] scores = prediction['score'] # Combine labels and scores, and sort by score in descending order # Extract top 3 labels and their scores sentiment = labels return sentiment, df, fig file_path =gr.File(label="Upload a File") label = gr.Label( label="Labels") classificationB = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier",theme= 'dark') main = gr.TabbedInterface([demo, classificationB],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" ) main.launch(share=True)