Spaces:

seek007
/

external

Sleeping

App Files Files Community

external / app.py

seek007

Update app.py

5c77412 verified about 2 months ago

raw

history blame contribute delete

5.39 kB

	# -- coding: utf-8 --
	"""

	Developed by Abdul S.
	FA20-BCS-OO1 final app.ipynb


	Automatically generated by Colab
	"""

	import pandas as pd
	import numpy as np
	import gradio as gr
	from TweetNormalizer import normalizeTweet
	import seaborn as sns
	import matplotlib.pyplot as plt
	from transformers import pipeline

	# Set pandas display option to show only 2 decimal places
	pd.set_option('display.float_format', '{:.2f}'.format)

	pipe= pipeline(model="seek007/taskA-DeBERTa-large-1.0.0",tokenizer='seek007/taskA-DeBERTa-large-1.0.0')

	# pipe = joblib.load('/content/drive/MyDrive/FYPpkl models/pipeA-wTok-0.0.1.pkl')



	#
	def predict(text=None , fil=None):
	sentiment =None
	df=None
	fig=None

	if text == None and fil == None:
	return "Either enter text or upload .csv or .xlsx file.!" , df, fig

	# Preprocess the text
	preprocessed_text = normalizeTweet(text)

	if fil:
	if fil.name.endswith('.csv'):
	df = pd.read_csv(fil.name, header=None , names=['tweet'], usecols=[0])
	elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
	df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0])
	else:
	raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")


	lst = list(df.tweet)
	m =[normalizeTweet(i) for i in lst]

	d = pd.DataFrame(pipe.predict(m))
	df['label'] = d['label']



	sarcastic_count = np.sum(df.label == 'sarcastic')
	non_sarcastic_count = np.sum(df.label =='non_sarcastic')

	labels = ['Sarcastic', 'Non-Sarcastic']
	sizes = [sarcastic_count, non_sarcastic_count]
	colors = ['gold', 'lightblue']
	explode = (0.1, 0) # explode 1st slice
	sns.set_style("whitegrid")
	fig, ax = plt.subplots()
	ax.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%', shadow=True, startangle=140) #, colors=colors
	ax.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.

	plt.title('Sarcastic vs Non-Sarcastic Tweets')
	if text == None:
	sentiment = df['label'][0]

	if text != "":
	prediction = pipe.predict([preprocessed_text])[0]
	print(prediction)

	sentiment = "Sarcastic" if prediction['label'] == 'sarcastic' else "Non Sarcastic"
	if fil == None:
	df= pd.DataFrame([{'tweet':text, 'label':sentiment}])


	return sentiment, df, fig





	file_path =gr.File(label="Upload a File")
	output = gr.Label(num_top_classes=2, label="Predicted Labels")
	detector = gr.Interface(fn=predict, inputs=[gr.Text(label="Input"),file_path], outputs=[output, gr.DataFrame(headers =['Tweets', 'Labels'], wrap=True), gr.Plot(label="Sarcasm Predictor")], title="Sarcasm Predictor")

	# demo.launch(debug=True)


	# load classifier pipeline
	pipe2 = pipeline(model="seek007/taskB-bertweet-base-trainer-1.0.0", tokenizer="seek007/taskB-bertweet-base-trainer-1.0.0")


	# classifier
	def classifyB(text=None , fil=None):
	sentiment = None
	df = None
	fig = None

	if text is None and fil is None:
	return "Either enter text or upload .csv or .xlsx file.!" , df, fig


	# Preprocess the text
	preprocessed_text = normalizeTweet(text)


	labels = ['sarcasm', 'irony','Staire', 'understatement','overstatement', 'rhetorical question']

	if fil:
	if fil.name.endswith('.csv'):
	df = pd.read_csv(fil.name, header=None, names=['tweet'], usecols=[0])
	elif fil.name.endswith('.xlsx') or fil.name.endswith('.xls'):
	df = pd.read_excel(fil.name, header=None, names=['tweet'], usecols=[0])
	else:
	raise ValueError("Unsupported file type. Please upload a CSV or Excel file.")

	lst = list(df['tweet'])
	m =[normalizeTweet(i) for i in lst]
	d = pipe2(m)

	structured_data = []

	# Iterate over the list of dictionaries and convert each to a structured dictionary
	for item in d:
	labels = item['label']
	scores = item['score']
	structured_data.append({ "label": labels, "score": scores})

	# Convert the list of dictionaries to a DataFrame
	df1 = pd.DataFrame(structured_data)
	df = pd.concat([df, df1], axis=1)


	fig = plt.figure() #figsize=(8, 6)
	sns.countplot(x='label', data=df, palette='viridis')
	plt.title('Result: Count Plot') # Add a title to the plot
	plt.xlabel('label') # Add label for the x-axis
	plt.ylabel('Count')
	if text is None:
	sentiment = df['label'][0]

	# Perform sentiment prediction
	if text:
	prediction = pipe2([preprocessed_text])[0]
	# print(prediction["label"])
	labels = prediction['label']
	scores = prediction['score']
	sentiment = labels
	if fil is None:
	df= pd.DataFrame([{'tweet':text, 'label':sentiment, "score": scores}])

	return sentiment, df, fig



	file_path =gr.File(label="Upload a File")
	label = gr.Label( label="Labels")
	classifier = gr.Interface(classifyB, inputs=[gr.Text(label="Input"),file_path], outputs= [label, gr.DataFrame(headers =['Tweets', 'Label', "Score"], wrap=True), gr.Plot(label="Sarcasm classifier")], title="Sarcasm Classifier") #,theme= 'darkhuggingface'

	main = gr.TabbedInterface([detector, classifier],['Analysizer', 'Classifier'], title="Sarcasm Predictor: An Optimized Sentiment Analysis system" )

	main.launch(share=True)