Larisa Kolesnichenko commited on
Commit
099a2f3
1 Parent(s): a0be2a1

Make processing of punctuation consistent with train data: frame each symbol with spaces

Browse files
Files changed (1) hide show
  1. model_wrapper.py +5 -1
model_wrapper.py CHANGED
@@ -4,6 +4,7 @@ import tempfile
4
  import sys
5
  import datetime
6
  import re
 
7
  sys.path.append('mtool')
8
 
9
  import torch
@@ -78,7 +79,10 @@ class PredictionModel:
78
 
79
 
80
  def clean_texts(self, texts):
81
- return [re.sub(r' +', ' ', t) for t in texts]
 
 
 
82
 
83
 
84
  def _predict_to_mrp(self, texts, graph_mode='labeled-edge'):
 
4
  import sys
5
  import datetime
6
  import re
7
+ import string
8
  sys.path.append('mtool')
9
 
10
  import torch
 
79
 
80
 
81
  def clean_texts(self, texts):
82
+ punctuation = ''.join([f'\\{s}' for s in string.punctuation])
83
+ texts = [re.sub(f'([{punctuation}])', ' \\1 ', t) for t in texts]
84
+ texts = [re.sub(r' +', ' ', t) for t in texts]
85
+ return texts
86
 
87
 
88
  def _predict_to_mrp(self, texts, graph_mode='labeled-edge'):