Spaces:

TuanScientist
/

Sentiment2

Running

TuanScientist commited on Jul 19, 2024

Commit

ce26f8e

verified ·

1 Parent(s): bd64970

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,17 +8,19 @@ from openpyxl.styles import Font, Color, PatternFill
 from openpyxl.styles.colors import WHITE
 import gradio as gr
 import underthesea
 # Load the model and tokenizer
 senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
 senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
 def segmentation(text):
-    sentences = text.split('.')
     segmented_sentences = []
     for sentence in sentences:
         sentence = sentence.strip()
-        if sentence:  # ignore empty sentences
             segmented_sentence = underthesea.word_tokenize(sentence)
             segmented_sentences.append(' '.join(segmented_sentence))
     return segmented_sentences
@@ -153,6 +155,12 @@ def generate_excel_file(df):
     return excel_file_path
 inputs = [
     gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
     gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")

 from openpyxl.styles.colors import WHITE
 import gradio as gr
 import underthesea
+import re
 # Load the model and tokenizer
 senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
 senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
 def segmentation(text):
+    # Split text by periods and newlines
+    sentences = re.split(r'[.\n]', text)
     segmented_sentences = []
     for sentence in sentences:
         sentence = sentence.strip()
+        if sentence:  # Ignore empty sentences
             segmented_sentence = underthesea.word_tokenize(sentence)
             segmented_sentences.append(' '.join(segmented_sentence))
     return segmented_sentences
     return excel_file_path
+def analyze_from_text(text):
+    return analyze_text(text, None)
+def analyze_from_file(docx_file):
+    return analyze_text(None, docx_file)
 inputs = [
     gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
     gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")