File size: 5,392 Bytes
8432f36
 
 
5c77412
 
8432f36
 
5c77412
 
8432f36
5c77412
 
8432f36
 
 
 
 
5c77412
 
 
 
8432f36
 
 
 
 
 
5c77412
8432f36
 
 
 
5c77412
 
 
 
 
 
 
8432f36
 
5c77412
8432f36
5c77412
8432f36
 
 
6d8b322
5c77412
8432f36
6d8b322
8432f36
 
 
 
6d8b322
5c77412
8432f36
 
 
 
 
 
 
 
 
 
 
 
5c77412
 
6d8b322
5c77412
8432f36
 
6d8b322
5c77412
8432f36
 
5c77412
8432f36
 
 
 
 
 
 
 
 
5c77412
8432f36
 
 
 
5c77412
8432f36
 
5c77412
 
8432f36
5c77412
 
 
 
 
 
 
 
8432f36
 
5c77412
6d8b322
8432f36
6d8b322
8432f36
 
5c77412
8432f36
5c77412
8432f36
 
 
5c77412
8432f36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c77412
6d8b322
 
8432f36
5c77412
8432f36
6d8b322
8432f36
5c77412
8432f36
5c77412
 
 
6d8b322
8432f36
 
 
 
 
5c77412
8432f36
5c77412
8432f36
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# -*- coding: utf-8 -*-
"""

Developed by Abdul S.
FA20-BCS-OO1 final app.ipynb


Automatically generated by Colab
"""

import pandas as pd
import numpy as np
import gradio as gr
from TweetNormalizer import normalizeTweet
import seaborn as sns
import matplotlib.pyplot as plt
from transformers import pipeline

# Set pandas display option to show only 2 decimal places
pd.set_option('display.float_format', '{:.2f}'.format)

pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')

# pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')



# 
def predict(text=None , fil=None):
    sentiment =None
    df=None
    fig=None
    
    if text == None and fil == None:
      return "Either enter text or upload .csv or .xlsx file.!"  , df, fig

    # Preprocess the text
    preprocessed_text = normalizeTweet(text)
   
    if fil:
      if fil.name.endswith('.csv'):
          df = pd.read_csv(fil.name, header=None , names=['tweet'], usecols=[0])
      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
          df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0])
      else:
          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

   
      lst = list(df.tweet)
      m =[normalizeTweet(i) for i in lst]

      d = pd.DataFrame(pipe.predict(m))
      df['label'] = d['label']



      sarcastic_count = np.sum(df.label == 'sarcastic')
      non_sarcastic_count = np.sum(df.label =='non_sarcastic')

      labels = ['Sarcastic', 'Non-Sarcastic']
      sizes = [sarcastic_count, non_sarcastic_count]
      colors = ['gold', 'lightblue']
      explode = (0.1, 0)  # explode 1st slice
      sns.set_style("whitegrid")
      fig, ax = plt.subplots()
      ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140)  #, colors=colors
      ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

      plt.title('Sarcastic vs Non-Sarcastic Tweets')
      if text == None:
        sentiment = df['label'][0]
  
    if text != "":
      prediction = pipe.predict([preprocessed_text])[0]
      print(prediction)
     
      sentiment = "Sarcastic" if prediction['label'] == 'sarcastic' else "Non Sarcastic"
      if fil == None:
        df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
  

    return sentiment, df, fig





file_path =gr.File(label="Upload a File")
output = gr.Label(num_top_classes=2, label="Predicted Labels")
detector = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")

# demo.launch(debug=True)


# load classifier pipeline
pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")


# classifier
def classifyB(text=None , fil=None):
    sentiment = None
    df = None
    fig = None
    
    if text is None and fil is None:
      return "Either enter text or upload .csv or .xlsx file.!"  , df, fig


    # Preprocess the text
    preprocessed_text = normalizeTweet(text)
    
    
    labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
    
    if fil:
      if fil.name.endswith('.csv'):
          df = pd.read_csv(fil.name, header=None, names=['tweet'], usecols=[0])
      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
          df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0])
      else:
          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

      lst = list(df['tweet'])
      m =[normalizeTweet(i) for i in lst]
      d = pipe2(m)

      structured_data = []

      # Iterate over the list of dictionaries and convert each to a structured dictionary
      for item in d:
          labels = item['label']
          scores = item['score']
          structured_data.append({ "label": labels, "score": scores})

      # Convert the list of dictionaries to a DataFrame
      df1 = pd.DataFrame(structured_data)
      df = pd.concat([df, df1], axis=1)


      fig = plt.figure()  #figsize=(8, 6)
      sns.countplot(x='label', data=df, palette='viridis')
      plt.title('Result: Count Plot')  # Add a title to the plot
      plt.xlabel('label')  # Add label for the x-axis
      plt.ylabel('Count')
      if text is None:
        sentiment = df['label'][0]
        
     # Perform sentiment prediction
    if text:
      prediction = pipe2([preprocessed_text])[0]
      # print(prediction["label"])
      labels = prediction['label']
      scores = prediction['score']
      sentiment = labels
      if fil is None:
        df= pd.DataFrame([{'tweet':text, 'label':sentiment, "score": scores}])

    return sentiment, df, fig



file_path =gr.File(label="Upload a File")
label = gr.Label( label="Labels")
classifier = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier")  #,theme= 'darkhuggingface'

main = gr.TabbedInterface([detector, classifier],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )

main.launch(share=True)