carisackc commited on
Commit
a2c3ee0
·
1 Parent(s): 9941c35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -89
app.py CHANGED
@@ -167,97 +167,97 @@ def genEntities(ann, entity):
167
 
168
  ##======================== Start of NER Tagging ========================
169
  # ====== Old NER ======
170
- doc = nlp(str(original_text2))
171
- colors = { "DISEASE": "pink","CHEMICAL": "orange"}
172
- options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
173
- ent_html = displacy.render(doc, style="ent", options=options)
174
  # ====== End of Old NER ======
175
 
176
- # #lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
177
- # def lemmatize(note, nlp):
178
- # doc = nlp(note)
179
- # lemNote = [wd.lemma_ for wd in doc]
180
- # return " ".join(lemNote)
181
-
182
- # #function to modify options for displacy NER visualization
183
- # def get_entity_options():
184
- # entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
185
- # colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
186
- # options = {"ents": entities, "colors": colors}
187
- # return options
188
-
189
- # #adding a new pipeline component to identify negation
190
- # def neg_model(nlp_model):
191
- # nlp = spacy.load(nlp_model, disable = ['parser'])
192
- # # nlp.add_pipe(nlp.create_pipe('sentencizer'))
193
- # nlp.add_pipe('sentencizer')
194
- # # negex = Negex(nlp)
195
- # nlp.add_pipe(
196
- # "negex",
197
- # config={
198
- # "chunk_prefix": ["no"],
199
- # },
200
- # last=True)
201
- # return nlp
202
-
203
- # def negation_handling(nlp_model, note, neg_model):
204
- # results = []
205
- # nlp = neg_model(nlp_model)
206
- # note = note.split(".") #sentence tokenizing based on delimeter
207
- # note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
208
- # for t in note:
209
- # doc = nlp(t)
210
- # for e in doc.ents:
211
- # rs = str(e._.negex)
212
- # if rs == "True":
213
- # results.append(e.text)
214
- # return results
215
-
216
- # #function to identify span objects of matched negative phrases from text
217
- # def match(nlp,terms,label):
218
- # patterns = [nlp.make_doc(text) for text in terms]
219
- # matcher = PhraseMatcher(nlp.vocab)
220
- # matcher.add(label, None, *patterns)
221
- # return matcher
222
-
223
- # #replacing the labels for identified negative entities
224
- # def overwrite_ent_lbl(matcher, doc):
225
- # matches = matcher(doc)
226
- # seen_tokens = set()
227
- # new_entities = []
228
- # entities = doc.ents
229
- # for match_id, start, end in matches:
230
- # if start not in seen_tokens and end - 1 not in seen_tokens:
231
- # new_entities.append(Span(doc, start, end, label=match_id))
232
- # entities = [e for e in entities if not (e.start < end and e.end > start)]
233
- # seen_tokens.update(range(start, end))
234
- # doc.ents = tuple(entities) + tuple(new_entities)
235
- # return doc
236
-
237
- # #deduplicate repeated entities
238
- # def dedupe(items):
239
- # seen = set()
240
- # for item in items:
241
- # item = str(item).strip()
242
- # if item not in seen:
243
- # yield item
244
- # seen.add(item)
245
-
246
- # lem_clinical_note= lemmatize(runtext, nlp0)
247
- # #creating a doc object using BC5CDR model
248
- # doc = nlp1(lem_clinical_note)
249
- # options = get_entity_options()
250
-
251
- # #list of negative concepts from clinical note identified by negspacy
252
- # results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
253
-
254
- # matcher = match(nlp1, results0,"NEG_ENTITY")
255
-
256
- # #doc0: new doc object with added "NEG_ENTITY label"
257
- # doc0 = overwrite_ent_lbl(matcher,doc)
258
-
259
- # #visualizing identified Named Entities in clinical input text
260
- # ent_html = displacy.render(doc0, style='ent', options=options)
261
 
262
  ##======================== End of NER Tagging ========================
263
 
 
167
 
168
  ##======================== Start of NER Tagging ========================
169
  # ====== Old NER ======
170
+ # doc = nlp(str(original_text2))
171
+ # colors = { "DISEASE": "pink","CHEMICAL": "orange"}
172
+ # options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
173
+ # ent_html = displacy.render(doc, style="ent", options=options)
174
  # ====== End of Old NER ======
175
 
176
+ #lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
177
+ def lemmatize(note, nlp):
178
+ doc = nlp(note)
179
+ lemNote = [wd.lemma_ for wd in doc]
180
+ return " ".join(lemNote)
181
+
182
+ #function to modify options for displacy NER visualization
183
+ def get_entity_options():
184
+ entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
185
+ colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
186
+ options = {"ents": entities, "colors": colors}
187
+ return options
188
+
189
+ #adding a new pipeline component to identify negation
190
+ def neg_model(nlp_model):
191
+ nlp = spacy.load(nlp_model, disable = ['parser'])
192
+ # nlp.add_pipe(nlp.create_pipe('sentencizer'))
193
+ nlp.add_pipe('sentencizer')
194
+ # negex = Negex(nlp)
195
+ nlp.add_pipe(
196
+ "negex",
197
+ config={
198
+ "chunk_prefix": ["no"],
199
+ },
200
+ last=True)
201
+ return nlp
202
+
203
+ def negation_handling(nlp_model, note, neg_model):
204
+ results = []
205
+ nlp = neg_model(nlp_model)
206
+ note = note.split(".") #sentence tokenizing based on delimeter
207
+ note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
208
+ for t in note:
209
+ doc = nlp(t)
210
+ for e in doc.ents:
211
+ rs = str(e._.negex)
212
+ if rs == "True":
213
+ results.append(e.text)
214
+ return results
215
+
216
+ #function to identify span objects of matched negative phrases from text
217
+ def match(nlp,terms,label):
218
+ patterns = [nlp.make_doc(text) for text in terms]
219
+ matcher = PhraseMatcher(nlp.vocab)
220
+ matcher.add(label, None, *patterns)
221
+ return matcher
222
+
223
+ #replacing the labels for identified negative entities
224
+ def overwrite_ent_lbl(matcher, doc):
225
+ matches = matcher(doc)
226
+ seen_tokens = set()
227
+ new_entities = []
228
+ entities = doc.ents
229
+ for match_id, start, end in matches:
230
+ if start not in seen_tokens and end - 1 not in seen_tokens:
231
+ new_entities.append(Span(doc, start, end, label=match_id))
232
+ entities = [e for e in entities if not (e.start < end and e.end > start)]
233
+ seen_tokens.update(range(start, end))
234
+ doc.ents = tuple(entities) + tuple(new_entities)
235
+ return doc
236
+
237
+ #deduplicate repeated entities
238
+ def dedupe(items):
239
+ seen = set()
240
+ for item in items:
241
+ item = str(item).strip()
242
+ if item not in seen:
243
+ yield item
244
+ seen.add(item)
245
+
246
+ lem_clinical_note= lemmatize(runtext, nlp0)
247
+ #creating a doc object using BC5CDR model
248
+ doc = nlp1(lem_clinical_note)
249
+ options = get_entity_options()
250
+
251
+ #list of negative concepts from clinical note identified by negspacy
252
+ results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
253
+
254
+ matcher = match(nlp1, results0,"NEG_ENTITY")
255
+
256
+ #doc0: new doc object with added "NEG_ENTITY label"
257
+ doc0 = overwrite_ent_lbl(matcher,doc)
258
+
259
+ #visualizing identified Named Entities in clinical input text
260
+ ent_html = displacy.render(doc0, style='ent', options=options)
261
 
262
  ##======================== End of NER Tagging ========================
263