PirateXX commited on
Commit
1f54896
·
1 Parent(s): a102ee8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -14,11 +14,10 @@ model_name = "roberta-base"
14
  tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
15
 
16
  def text_to_sentences(text):
17
- re.sub(r'(?<=[.!?])(?=[^\s])', r' ', text)
18
- return re.split(r'[.!?]', text)
19
 
20
  # function to concatenate sentences into chunks of size 900 or less
21
- def chunks_of_600(text, chunk_size=600):
22
  sentences = text_to_sentences(text)
23
  chunks = []
24
  current_chunk = ""
@@ -47,7 +46,7 @@ def predict(query, device="cpu"):
47
  return real
48
 
49
  def findRealProb(text):
50
- chunksOfText = (chunks_of_600(text))
51
  results = []
52
  for chunk in chunksOfText:
53
  output = predict(chunk)
 
14
  tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
15
 
16
  def text_to_sentences(text):
17
+ return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', text)
 
18
 
19
  # function to concatenate sentences into chunks of size 900 or less
20
+ def chunks_of_900(text, chunk_size=900):
21
  sentences = text_to_sentences(text)
22
  chunks = []
23
  current_chunk = ""
 
46
  return real
47
 
48
  def findRealProb(text):
49
+ chunksOfText = (chunks_of_900(text))
50
  results = []
51
  for chunk in chunksOfText:
52
  output = predict(chunk)