PirateXX commited on
Commit
fd64511
·
1 Parent(s): 44091f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -14,7 +14,8 @@ model_name = "roberta-base"
14
  tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
15
 
16
  def text_to_sentences(text):
17
- return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', text)
 
18
 
19
  # function to concatenate sentences into chunks of size 900 or less
20
  def chunks_of_900(text, chunk_size=900):
 
14
  tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
15
 
16
  def text_to_sentences(text):
17
+ clean_text = text.replace('\n', ' ')
18
+ return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
19
 
20
  # function to concatenate sentences into chunks of size 900 or less
21
  def chunks_of_900(text, chunk_size=900):