TuanScientist commited on
Commit
2618489
1 Parent(s): 110358f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -0
app.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import RobertaForSequenceClassification, AutoTokenizer
2
+ import torch
3
+ import docx2txt
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ import openpyxl
7
+ from openpyxl.styles import Font, Color, PatternFill
8
+ from openpyxl.styles.colors import WHITE
9
+ import gradio as gr
10
+ import underthesea
11
+
12
+ # Load the model and tokenizer
13
+ senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
14
+ senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
15
+
16
+
17
+
18
+ # Word segmented
19
+ def segmentation(sentences):
20
+ segmented_sentences = []
21
+ for sentence in sentences:
22
+ segmented_sentence = underthesea.word_tokenize(sentence)
23
+ segmented_sentences.append(' '.join(segmented_sentence))
24
+ return segmented_sentences
25
+
26
+
27
+ # File read
28
+ def read_file(docx):
29
+ try:
30
+ text = docx2txt.process(docx)
31
+ lines = text.split('\n')
32
+ lines = [line.strip() for line in lines]
33
+ lines = [line for line in lines if line]
34
+ return lines # add this line
35
+ except Exception as e:
36
+ print(f"Error reading file: {e}")
37
+
38
+
39
+ # Define a function to analyze the sentiment of a text
40
+ def analyze(sentence):
41
+ input_ids = torch.tensor([senti_tokenizer.encode(sentence)])
42
+ with torch.no_grad():
43
+ out = senti_model(input_ids)
44
+ results = out.logits.softmax(dim=-1).tolist()
45
+ return results[0]
46
+
47
+
48
+ def file_analysis(docx):
49
+ # Read the file and segment the sentences
50
+ sentences = read_file(docx)
51
+ segmented_sentences = segmentation(sentences)
52
+
53
+ # Analyze the sentiment of each sentence
54
+ results = []
55
+ for sentence in segmented_sentences:
56
+ results.append(analyze(sentence))
57
+
58
+ return results
59
+
60
+
61
+ def generate_pie_chart(df):
62
+ # Calculate the average scores
63
+ neg_avg = df['Negative'].mean()
64
+ pos_avg = df['Positive'].mean()
65
+ neu_avg = df['Neutral'].mean()
66
+
67
+ # Create a new DataFrame with the average scores
68
+ avg_df = pd.DataFrame({'Sentiment': ['Negative', 'Positive', 'Neutral'],
69
+ 'Score': [neg_avg, pos_avg, neu_avg]})
70
+
71
+ # Set custom colors for the pie chart
72
+ colors = ['#BDBDBD', '#9ACD32', '#87CEFA']
73
+
74
+ # Create a pie chart showing the average scores
75
+ plt.pie(avg_df['Score'], labels=avg_df['Sentiment'], colors=colors, autopct='%1.1f%%')
76
+ plt.title('Average Scores by Sentiment')
77
+
78
+ # Save the pie chart as an image file in the static folder
79
+ pie_chart_name = 'pie_chart.png'
80
+ plt.savefig(pie_chart_name)
81
+ plt.close()
82
+
83
+ return pie_chart_name
84
+
85
+
86
+ def generate_excel_file(df):
87
+ # Create a new workbook and worksheet
88
+ wb = openpyxl.Workbook()
89
+ ws = wb.active
90
+
91
+ # Add column headers to the worksheet
92
+ headers = ['Negative', 'Positive', 'Neutral', 'Text']
93
+ for col_num, header in enumerate(headers, 1):
94
+ cell = ws.cell(row=1, column=col_num)
95
+ cell.value = header
96
+ cell.font = Font(bold=True)
97
+
98
+ # Set up cell formatting for each sentiment
99
+ fill_dict = {
100
+ 'Negative': PatternFill(start_color='BDBDBD', end_color='BDBDBD', fill_type='solid'),
101
+ 'Positive': PatternFill(start_color='9ACD32', end_color='9ACD32', fill_type='solid'),
102
+ 'Neutral': PatternFill(start_color='87CEFA', end_color='87CEFA', fill_type='solid')
103
+ }
104
+
105
+ # Loop through each row of the input DataFrame and write data to the worksheet
106
+ for row_num, row_data in df.iterrows():
107
+ # Calculate the highest score and corresponding sentiment for this row
108
+ sentiment_cols = ['Negative', 'Positive', 'Neutral']
109
+ scores = [row_data[col] for col in sentiment_cols]
110
+ max_score = max(scores)
111
+ max_index = scores.index(max_score)
112
+ sentiment = sentiment_cols[max_index]
113
+
114
+ # Write the data to the worksheet
115
+ for col_num, col_data in enumerate(row_data, 1):
116
+ cell = ws.cell(row=row_num + 2, column=col_num)
117
+ cell.value = col_data
118
+ if col_num in [1, 2, 3]:
119
+ if col_data == max_score:
120
+ cell.fill = fill_dict[sentiment]
121
+ if col_num == 4:
122
+ fill = fill_dict[sentiment]
123
+ font_color = WHITE if fill.start_color.rgb == 'BDBDBD' else Color('000000')
124
+ cell.fill = fill
125
+ cell.font = Font(color=font_color)
126
+ if col_data == max_score:
127
+ cell.fill = fill_dict[sentiment]
128
+
129
+ # Save the workbook
130
+ excel_file_path = 'result.xlsx'
131
+ wb.save(excel_file_path)
132
+
133
+ return excel_file_path
134
+
135
+
136
+ def process_file(docx):
137
+ # Perform analysis on the file
138
+ results = file_analysis(docx)
139
+
140
+ # Create a DataFrame from the results
141
+ df = pd.DataFrame(results, columns=['Negative', 'Positive', 'Neutral'])
142
+ df['Text'] = read_file(docx)
143
+
144
+ # Generate the pie chart and excel file
145
+ pie_chart_name = generate_pie_chart(df)
146
+ excel_file_path = generate_excel_file(df)
147
+
148
+ return pie_chart_name, excel_file_path
149
+
150
+ def analyze_file(docx_file):
151
+ # Process the file and generate the output files
152
+ pie_chart_name, excel_file_path = process_file(docx_file.name)
153
+
154
+ # Return the file paths for the pie chart and excel file
155
+ return excel_file_path, pie_chart_name
156
+
157
+ inputs = gr.inputs.File(label="Chọn Tệp Bạn Muốn Phân Tích")
158
+ outputs = [
159
+ gr.outputs.File(label="Kết Quả Phân Tích Excel"),
160
+ gr.outputs.Image(type="filepath",label="Thông Số Phân Tích")
161
+ ]
162
+
163
+ interface = gr.Interface(
164
+ fn=analyze_file,
165
+ inputs=inputs,
166
+ outputs=outputs,
167
+ title="Sentiment Analysis",
168
+ allow_flagging="never" # Disable flag button
169
+ )
170
+
171
+ if __name__ == "__main__":
172
+ interface.launch(share=True)