Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -14,7 +14,8 @@ model_name = "roberta-base"
|
|
14 |
tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
|
15 |
|
16 |
def text_to_sentences(text):
|
17 |
-
|
|
|
18 |
|
19 |
# function to concatenate sentences into chunks of size 900 or less
|
20 |
def chunks_of_900(text, chunk_size=900):
|
|
|
14 |
tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
|
15 |
|
16 |
def text_to_sentences(text):
|
17 |
+
clean_text = text.replace('\n', ' ')
|
18 |
+
return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
|
19 |
|
20 |
# function to concatenate sentences into chunks of size 900 or less
|
21 |
def chunks_of_900(text, chunk_size=900):
|