Spaces:

seek007
/

external

Sleeping

File size: 5,392 Bytes

# -*- coding: utf-8 -*-
"""

Developed by Abdul S.
FA20-BCS-OO1 final app.ipynb


Automatically generated by Colab
"""

import pandas as pd
import numpy as np
import gradio as gr
from TweetNormalizer import normalizeTweet
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import pipeline

# Set pandas display option to show only 2 decimal places
pd.set_option('display.float_format', '{:.2f}'.format)

pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')

# pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')



# 
def predict(text=None , fil=None):
    sentiment =None
    df=None
    fig=None
    
    if text == None and fil == None:
      return "Either enter text or upload .csv or .xlsx file.!"  , df, fig

    # Preprocess the text
    preprocessed_text = normalizeTweet(text)
   
    if fil:
      if fil.name.endswith('.csv'):
          df = pd.read_csv(fil.name, header=None , names=['tweet'], usecols=[0])
      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
          df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0])
      else:
          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

   
      lst = list(df.tweet)
      m =[normalizeTweet(i) for i in lst]

      d = pd.DataFrame(pipe.predict(m))
      df['label'] = d['label']



      sarcastic_count = np.sum(df.label == 'sarcastic')
      non_sarcastic_count = np.sum(df.label =='non_sarcastic')

      labels = ['Sarcastic', 'Non-Sarcastic']
      sizes = [sarcastic_count, non_sarcastic_count]
      colors = ['gold', 'lightblue']
      explode = (0.1, 0)  # explode 1st slice
      sns.set_style("whitegrid")
      fig, ax = plt.subplots()
      ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140)  #, colors=colors
      ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

      plt.title('Sarcastic vs Non-Sarcastic Tweets')
      if text == None:
        sentiment = df['label'][0]
  
    if text != "":
      prediction = pipe.predict([preprocessed_text])[0]
      print(prediction)
     
      sentiment = "Sarcastic" if prediction['label'] == 'sarcastic' else "Non Sarcastic"
      if fil == None:
        df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
  

    return sentiment, df, fig





file_path =gr.File(label="Upload a File")
output = gr.Label(num_top_classes=2, label="Predicted Labels")
detector = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")

# demo.launch(debug=True)


# load classifier pipeline
pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")


# classifier
def classifyB(text=None , fil=None):
    sentiment = None
    df = None
    fig = None
    
    if text is None and fil is None:
      return "Either enter text or upload .csv or .xlsx file.!"  , df, fig


    # Preprocess the text
    preprocessed_text = normalizeTweet(text)
    
    
    labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
    
    if fil:
      if fil.name.endswith('.csv'):
          df = pd.read_csv(fil.name, header=None, names=['tweet'], usecols=[0])
      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
          df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0])
      else:
          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

      lst = list(df['tweet'])
      m =[normalizeTweet(i) for i in lst]
      d = pipe2(m)

      structured_data = []

      # Iterate over the list of dictionaries and convert each to a structured dictionary
      for item in d:
          labels = item['label']
          scores = item['score']
          structured_data.append({ "label": labels, "score": scores})

      # Convert the list of dictionaries to a DataFrame
      df1 = pd.DataFrame(structured_data)
      df = pd.concat([df, df1], axis=1)


      fig = plt.figure()  #figsize=(8, 6)
      sns.countplot(x='label', data=df, palette='viridis')
      plt.title('Result: Count Plot')  # Add a title to the plot
      plt.xlabel('label')  # Add label for the x-axis
      plt.ylabel('Count')
      if text is None:
        sentiment = df['label'][0]
        
     # Perform sentiment prediction
    if text:
      prediction = pipe2([preprocessed_text])[0]
      # print(prediction["label"])
      labels = prediction['label']
      scores = prediction['score']
      sentiment = labels
      if fil is None:
        df= pd.DataFrame([{'tweet':text, 'label':sentiment, "score": scores}])

    return sentiment, df, fig



file_path =gr.File(label="Upload a File")
label = gr.Label( label="Labels")
classifier = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier")  #,theme= 'darkhuggingface'

main = gr.TabbedInterface([detector, classifier],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )

main.launch(share=True)