madhavkotecha commited on
Commit
36e70ac
·
verified ·
1 Parent(s): dd446cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -5
app.py CHANGED
@@ -30,6 +30,13 @@ class CRF_POS_Tagger:
30
  "ness", "ous", "ship", "y", "es", "s"
31
  ]
32
 
 
 
 
 
 
 
 
33
  self.prefix_pattern = f"^({'|'.join(self.prefixes)})"
34
  self.suffix_pattern = f"({'|'.join(self.suffixes)})$"
35
 
@@ -42,7 +49,7 @@ class CRF_POS_Tagger:
42
  self.X_test = self.X[self.split:]
43
  self.y_test = self.y[self.split:]
44
  self.crf_model = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
45
- self.train()
46
 
47
  def word_splitter(self, word):
48
  prefix = ""
@@ -148,8 +155,9 @@ class CRF_POS_Tagger:
148
 
149
  return features
150
 
151
- def train(self):
152
- self.crf_model.fit(self.X_train, self.y_train)
 
153
 
154
  def predict(self, X_test):
155
  return self.crf_model.predict(X_test)
@@ -161,7 +169,8 @@ class CRF_POS_Tagger:
161
  self.predicted_tag.extend([item for sublist in y_pred for item in sublist])
162
  return metrics.flat_accuracy_score(y_test, y_pred)
163
 
164
- def cross_validation(self, data):
 
165
  accuracies = []
166
  for i in range(5):
167
  n1 = int(i / 5.0 * len(data))
@@ -218,7 +227,7 @@ class CRF_POS_Tagger:
218
 
219
  def tagging(self, input):
220
  sentence = (re.sub(r'(\S)([.,;:!?])', r'\1 \2', input.strip())).split()
221
- sentence_list = [[word] for word in sentence]
222
  features = [self.word_features(sentence_list, i) for i in range(len(sentence_list))]
223
 
224
  predicted_tags = self.crf_model.predict([features])
@@ -226,6 +235,14 @@ class CRF_POS_Tagger:
226
  return output
227
 
228
  tagger = CRF_POS_Tagger()
 
 
 
 
 
 
 
 
229
  interface = gr.Interface(fn = tagger.tagging,
230
  inputs = gr.Textbox(
231
  label="Input Sentence",
 
30
  "ness", "ous", "ship", "y", "es", "s"
31
  ]
32
 
33
+ self.interjections = {
34
+ 'ah', 'alas', 'aha', 'bravo', 'darn', 'drat', 'eh', 'eek', 'eww',
35
+ 'gosh', 'ha', 'hooray', 'hmm', 'huh', 'oops', 'ouch', 'phew',
36
+ 'pow', 'yay', 'whoa', 'wow', 'yikes', 'yippee', 'uh', 'um',
37
+ 'hey', 'hello'
38
+ }
39
+
40
  self.prefix_pattern = f"^({'|'.join(self.prefixes)})"
41
  self.suffix_pattern = f"({'|'.join(self.suffixes)})$"
42
 
 
49
  self.X_test = self.X[self.split:]
50
  self.y_test = self.y[self.split:]
51
  self.crf_model = sklearn_crfsuite.CRF(algorithm='lbfgs', c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True)
52
+ # self.train()
53
 
54
  def word_splitter(self, word):
55
  prefix = ""
 
155
 
156
  return features
157
 
158
+ def train(self, data=None):
159
+ X_train, y_train = zip(*data) if data else self.X_train, self.y_train
160
+ self.crf_model.fit(X_train, y_train)
161
 
162
  def predict(self, X_test):
163
  return self.crf_model.predict(X_test)
 
169
  self.predicted_tag.extend([item for sublist in y_pred for item in sublist])
170
  return metrics.flat_accuracy_score(y_test, y_pred)
171
 
172
+ def cross_validation(self):
173
+ data = list(zip(self.X, self.y))
174
  accuracies = []
175
  for i in range(5):
176
  n1 = int(i / 5.0 * len(data))
 
227
 
228
  def tagging(self, input):
229
  sentence = (re.sub(r'(\S)([.,;:!?])', r'\1 \2', input.strip())).split()
230
+ sentence_list = [[word.lower()] for word in sentence]
231
  features = [self.word_features(sentence_list, i) for i in range(len(sentence_list))]
232
 
233
  predicted_tags = self.crf_model.predict([features])
 
235
  return output
236
 
237
  tagger = CRF_POS_Tagger()
238
+
239
+ accuracies, avg_accuracy = tagger.cross_validation()
240
+ print(f"Cross-Validation Accuracies: {accuracies}")
241
+ print(f"Average Accuracy: {avg_accuracy}")
242
+
243
+ conf_matrix = tagger.con_matrix()
244
+ print(tagger.per_pos_accuracy(conf_matrix))
245
+
246
  interface = gr.Interface(fn = tagger.tagging,
247
  inputs = gr.Textbox(
248
  label="Input Sentence",