Spaces:
Running
Running
TuanScientist
commited on
Commit
•
ce26f8e
1
Parent(s):
bd64970
Update app.py
Browse files
app.py
CHANGED
@@ -8,17 +8,19 @@ from openpyxl.styles import Font, Color, PatternFill
|
|
8 |
from openpyxl.styles.colors import WHITE
|
9 |
import gradio as gr
|
10 |
import underthesea
|
|
|
11 |
|
12 |
# Load the model and tokenizer
|
13 |
senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
|
14 |
senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
|
15 |
|
16 |
def segmentation(text):
|
17 |
-
|
|
|
18 |
segmented_sentences = []
|
19 |
for sentence in sentences:
|
20 |
sentence = sentence.strip()
|
21 |
-
if sentence: #
|
22 |
segmented_sentence = underthesea.word_tokenize(sentence)
|
23 |
segmented_sentences.append(' '.join(segmented_sentence))
|
24 |
return segmented_sentences
|
@@ -153,6 +155,12 @@ def generate_excel_file(df):
|
|
153 |
|
154 |
return excel_file_path
|
155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
inputs = [
|
157 |
gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
|
158 |
gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")
|
|
|
8 |
from openpyxl.styles.colors import WHITE
|
9 |
import gradio as gr
|
10 |
import underthesea
|
11 |
+
import re
|
12 |
|
13 |
# Load the model and tokenizer
|
14 |
senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
|
15 |
senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
|
16 |
|
17 |
def segmentation(text):
|
18 |
+
# Split text by periods and newlines
|
19 |
+
sentences = re.split(r'[.\n]', text)
|
20 |
segmented_sentences = []
|
21 |
for sentence in sentences:
|
22 |
sentence = sentence.strip()
|
23 |
+
if sentence: # Ignore empty sentences
|
24 |
segmented_sentence = underthesea.word_tokenize(sentence)
|
25 |
segmented_sentences.append(' '.join(segmented_sentence))
|
26 |
return segmented_sentences
|
|
|
155 |
|
156 |
return excel_file_path
|
157 |
|
158 |
+
def analyze_from_text(text):
|
159 |
+
return analyze_text(text, None)
|
160 |
+
|
161 |
+
def analyze_from_file(docx_file):
|
162 |
+
return analyze_text(None, docx_file)
|
163 |
+
|
164 |
inputs = [
|
165 |
gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
|
166 |
gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")
|