TuanScientist commited on
Commit
ce26f8e
1 Parent(s): bd64970

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -2
app.py CHANGED
@@ -8,17 +8,19 @@ from openpyxl.styles import Font, Color, PatternFill
8
  from openpyxl.styles.colors import WHITE
9
  import gradio as gr
10
  import underthesea
 
11
 
12
  # Load the model and tokenizer
13
  senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
14
  senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
15
 
16
  def segmentation(text):
17
- sentences = text.split('.')
 
18
  segmented_sentences = []
19
  for sentence in sentences:
20
  sentence = sentence.strip()
21
- if sentence: # ignore empty sentences
22
  segmented_sentence = underthesea.word_tokenize(sentence)
23
  segmented_sentences.append(' '.join(segmented_sentence))
24
  return segmented_sentences
@@ -153,6 +155,12 @@ def generate_excel_file(df):
153
 
154
  return excel_file_path
155
 
 
 
 
 
 
 
156
  inputs = [
157
  gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
158
  gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")
 
8
  from openpyxl.styles.colors import WHITE
9
  import gradio as gr
10
  import underthesea
11
+ import re
12
 
13
  # Load the model and tokenizer
14
  senti_model = RobertaForSequenceClassification.from_pretrained("wonrax/phobert-base-vietnamese-sentiment")
15
  senti_tokenizer = AutoTokenizer.from_pretrained("wonrax/phobert-base-vietnamese-sentiment", use_fast=False)
16
 
17
  def segmentation(text):
18
+ # Split text by periods and newlines
19
+ sentences = re.split(r'[.\n]', text)
20
  segmented_sentences = []
21
  for sentence in sentences:
22
  sentence = sentence.strip()
23
+ if sentence: # Ignore empty sentences
24
  segmented_sentence = underthesea.word_tokenize(sentence)
25
  segmented_sentences.append(' '.join(segmented_sentence))
26
  return segmented_sentences
 
155
 
156
  return excel_file_path
157
 
158
+ def analyze_from_text(text):
159
+ return analyze_text(text, None)
160
+
161
+ def analyze_from_file(docx_file):
162
+ return analyze_text(None, docx_file)
163
+
164
  inputs = [
165
  gr.Textbox(label="Nhập Văn Bản bằng Tiếng Việt để trải nghiệm ngay"),
166
  gr.File(label="Chọn Tệp File Word(docx) Bạn Muốn Phân Tích")