File size: 6,221 Bytes
8432f36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# -*- coding: utf-8 -*-
"""FA20-BCS-OO1 final app.ipynb

Automatically generated by Colab
"""

# !pip install emoji gradio



import joblib, pickle, pandas as pd, numpy as np
import gradio as gr
from TweetNormalizer import normalizeTweet
import seaborn as sns
import matplotlib.pyplot as plt

from transformers import pipeline
#  seek007/taskA-DeBERTa-bweet-1.2.5
# seek007/taskA-DeBERTa-large-1.0.0
# seek007/taskA-DeBERTa-bweet-1.1.0
pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')

# pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')



import numpy as np

def predict(text=None , fil=None):
    # Preprocess the text
    preprocessed_text = normalizeTweet(text)
    sentiment =None
    df=None
    fig=None
    if fil:
      if fil.name.endswith('.csv'):
          df = pd.read_csv(fil.name)
      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
          df = pd.read_excel(fil.name)
      else:
          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

      # df= df.sample(20)
      lst = list(df.tweet)
      m =[normalizeTweet(i) for i in lst]
      # m = [truncate_string(i) for i in m]
      d = pd.DataFrame(pipe.predict(m))
      df['label'] = d['label']
      # print(df.sample(5))
      df.drop('sarcastic', axis=1, inplace=True)
      # print(df.sample(5))

      mapping = {
          'LABEL_0': 'non_sarcastic',
          'LABEL_1': 'sarcastic'
      }

      # df['label']=df['label'].map(mapping)
      sarcastic_count = np.sum(df.label =='sarcastic')
      non_sarcastic_count = np.sum(df.label =='non_sarcastic')

      labels = ['Sarcastic', 'Non-Sarcastic']
      sizes = [sarcastic_count, non_sarcastic_count]
      colors = ['gold', 'lightblue']
      explode = (0.1, 0)  # explode 1st slice
      sns.set_style("whitegrid")
      fig, ax = plt.subplots()
      ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140)  #, colors=colors
      ax.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

      plt.title('Sarcastic vs Non-Sarcastic Tweets')

      # fig = plt.figure()  #figsize=(8, 6)
      # sns.countplot(x='label', data=df, palette='viridis')
      # plt.title('Result: Count Plot')  # Add a title to the plot
      # plt.xlabel('label')  # Add label for the x-axis
      # plt.ylabel('Count')
     # Perform sentiment prediction
    if text !="" or fil !=None:
      prediction = pipe.predict([preprocessed_text])[0]
      print(prediction)
      # sentiment = {p['label']: p['score'] for p in prediction}
      # sentiment['']
      # print(sentiment)
      sentiment = "Sarcastic" if (prediction['label'] == 'LABEL_1' or prediction['label'] =='sarcastic') else "Non Sarcastic"
      if fil == None:
        df= pd.DataFrame([{'tweet':text, 'label':sentiment}])
    else:
      return "Either enter text or upload .csv or .xlsx file.!"  , df, fig

    return sentiment, df, fig





file_path =gr.File(label="Upload a File")
output = gr.Label(num_top_classes=2, label="Predicted Labels")
demo = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")

# demo.launch(debug=True)

file_path =gr.File(label="Upload a File")
label = gr.Label(num_top_classes=3, label="Top 3 Labels")
classification = gr.Interface(classify, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier")

# classification.launch(debug=True)

from transformers import pipeline
pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")

def classifyB(text=None , fil=None):
    # Preprocess the text
    preprocessed_text = normalizeTweet(text)
    sentiment =None
    df=None
    fig=None
    labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']
    if fil:
      if fil.name.endswith('.csv'):
          df = pd.read_csv(fil.name)
      elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
          df = pd.read_excel(fil.name)
      else:
          raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

      lst = list(df.tweet)
      m =[normalizeTweet(i) for i in lst]
      # m = [truncate_string(i) for i in m]
      d = pipe2(m)

      structured_data = []

      # Iterate over the list of dictionaries and convert each to a structured dictionary
      for item in d:
          labels = item['label']
          scores = item['score']
          structured_data.append({ "label": labels, "score": scores})

      # Convert the list of dictionaries to a DataFrame
      df1 = pd.DataFrame(structured_data)
      df = pd.concat([df, df1], axis=1)

      # df["labels"] = d['labels']
      # print("df: ",df.head())
      # return df.head()


      fig = plt.figure()  #figsize=(8, 6)
      sns.countplot(x='label', data=df, palette='viridis')
      plt.title('Result: Count Plot')  # Add a title to the plot
      plt.xlabel('label')  # Add label for the x-axis
      plt.ylabel('Count')
     # Perform sentiment prediction
    if text !=None or fil !=None:
      prediction = pipe2([preprocessed_text])[0]
      print(prediction["label"])
      labels = prediction['label']
      scores = prediction['score']

      # Combine labels and scores, and sort by score in descending order


      # Extract top 3 labels and their scores

      sentiment = labels


    return sentiment, df, fig

file_path =gr.File(label="Upload a File")
label = gr.Label( label="Labels")
classificationB = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier",theme= 'dark')

main = gr.TabbedInterface([demo, classificationB],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )

main.launch(share=True)