Spaces:

seek007
/

external

Sleeping

File size: 6,221 Bytes

8432f36

# -*- coding: utf-8 -*-
"""FA20-BCS-OO1 final app.ipynb

Automatically generated by Colab
"""

# !pip install emoji gradio



import joblib, pickle, pandas as pd, numpy as np
import gradio as gr
from TweetNormalizer import normalizeTweet
import seaborn as sns
import matplotlib.pyplot as plt

from transformers import pipeline
#  seek007/taskA-DeBERTa-bweet-1.2.5
# seek007/taskA-DeBERTa-large-1.0.0
# seek007/taskA-DeBERTa-bweet-1.1.0
pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')

# pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')



import numpy as np

def predict(text=None , fil=None):
    # Preprocess the text
    preprocessed_text = normalizeTweet(text)
    sentiment =None
    df=None
    fig=None
    if fil:
      if fil.name.endswith('.csv'):
          df = pd.read_csv(fil.name)
      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
          df = pd.read_excel(fil.name)
      else:
          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

      # df= df.sample(20)
      lst = list(df.tweet)
      m =[normalizeTweet(i) for i in lst]
      # m = [truncate_string(i) for i in m]
      d = pd.DataFrame(pipe.predict(m))
      df['label'] = d['label']
      # print(df.sample(5))
      df.drop('sarcastic', axis=1, inplace=True)
      # print(df.sample(5))

      mapping = {
          'LABEL_0': 'non_sarcastic',
          'LABEL_1': 'sarcastic'
      }

      # df['label']=df['label'].map(mapping)
      sarcastic_count = np.sum(df.label =='sarcastic')
      non_sarcastic_count = np.sum(df.label =='non_sarcastic')

      labels = ['Sarcastic', 'Non-Sarcastic']
      sizes = [sarcastic_count, non_sarcastic_count]
      colors = ['gold', 'lightblue']
      explode = (0.1, 0)  # explode 1st slice
      sns.set_style("whitegrid")
      fig, ax = plt.subplots()
      ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140)  #, colors=colors
      ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

      plt.title('Sarcastic vs Non-Sarcastic Tweets')

      # fig = plt.figure()  #figsize=(8, 6)
      # sns.countplot(x='label', data=df, palette='viridis')
      # plt.title('Result: Count Plot')  # Add a title to the plot
      # plt.xlabel('label')  # Add label for the x-axis
      # plt.ylabel('Count')
     # Perform sentiment prediction
    if text !="" or fil !=None:
      prediction = pipe.predict([preprocessed_text])[0]
      print(prediction)
      # sentiment = {p['label']: p['score'] for p in prediction}
      # sentiment['']
      # print(sentiment)
      sentiment = "Sarcastic" if (prediction['label'] == 'LABEL_1' or prediction['label'] =='sarcastic') else "Non Sarcastic"
      if fil == None:
        df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
    else:
      return "Either enter text or upload .csv or .xlsx file.!"  , df, fig

    return sentiment, df, fig





file_path =gr.File(label="Upload a File")
output = gr.Label(num_top_classes=2, label="Predicted Labels")
demo = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")

# demo.launch(debug=True)

file_path =gr.File(label="Upload a File")
label = gr.Label(num_top_classes=3, label="Top 3 Labels")
classification = gr.Interface(classify, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier")

# classification.launch(debug=True)

from transformers import pipeline
pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")

def classifyB(text=None , fil=None):
    # Preprocess the text
    preprocessed_text = normalizeTweet(text)
    sentiment =None
    df=None
    fig=None
    labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
    if fil:
      if fil.name.endswith('.csv'):
          df = pd.read_csv(fil.name)
      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
          df = pd.read_excel(fil.name)
      else:
          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

      lst = list(df.tweet)
      m =[normalizeTweet(i) for i in lst]
      # m = [truncate_string(i) for i in m]
      d = pipe2(m)

      structured_data = []

      # Iterate over the list of dictionaries and convert each to a structured dictionary
      for item in d:
          labels = item['label']
          scores = item['score']
          structured_data.append({ "label": labels, "score": scores})

      # Convert the list of dictionaries to a DataFrame
      df1 = pd.DataFrame(structured_data)
      df = pd.concat([df, df1], axis=1)

      # df["labels"] = d['labels']
      # print("df: ",df.head())
      # return df.head()


      fig = plt.figure()  #figsize=(8, 6)
      sns.countplot(x='label', data=df, palette='viridis')
      plt.title('Result: Count Plot')  # Add a title to the plot
      plt.xlabel('label')  # Add label for the x-axis
      plt.ylabel('Count')
     # Perform sentiment prediction
    if text !=None or fil !=None:
      prediction = pipe2([preprocessed_text])[0]
      print(prediction["label"])
      labels = prediction['label']
      scores = prediction['score']

      # Combine labels and scores, and sort by score in descending order


      # Extract top 3 labels and their scores

      sentiment = labels


    return sentiment, df, fig

file_path =gr.File(label="Upload a File")
label = gr.Label( label="Labels")
classificationB = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier",theme= 'dark')

main = gr.TabbedInterface([demo, classificationB],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )

main.launch(share=True)