carisackc commited on
Commit
1fbffbd
·
1 Parent(s): 13f4434

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -89
app.py CHANGED
@@ -166,97 +166,97 @@ def genEntities(ann, entity):
166
 
167
  ##======================== Start of NER Tagging ========================
168
  # ====== Old NER ======
169
- #doc = nlp(str(original_text2))
170
- #colors = { "DISEASE": "pink","CHEMICAL": "orange"}
171
- #options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
172
- #ent_html = displacy.render(doc, style="ent", options=options)
173
  # ====== End of Old NER ======
174
 
175
- #lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
176
- def lemmatize(note, nlp):
177
- doc = nlp(note)
178
- lemNote = [wd.lemma_ for wd in doc]
179
- return " ".join(lemNote)
180
-
181
- #function to modify options for displacy NER visualization
182
- def get_entity_options():
183
- entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
184
- colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
185
- options = {"ents": entities, "colors": colors}
186
- return options
187
-
188
- #adding a new pipeline component to identify negation
189
- def neg_model(nlp_model):
190
- nlp = spacy.load(nlp_model, disable = ['parser'])
191
- # nlp.add_pipe(nlp.create_pipe('sentencizer'))
192
- nlp.add_pipe('sentencizer')
193
- # negex = Negex(nlp)
194
- nlp.add_pipe(
195
- "negex",
196
- config={
197
- "chunk_prefix": ["no"],
198
- },
199
- last=True)
200
- return nlp
201
-
202
- def negation_handling(nlp_model, note, neg_model):
203
- results = []
204
- nlp = neg_model(nlp_model)
205
- note = note.split(".") #sentence tokenizing based on delimeter
206
- note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
207
- for t in note:
208
- doc = nlp(t)
209
- for e in doc.ents:
210
- rs = str(e._.negex)
211
- if rs == "True":
212
- results.append(e.text)
213
- return results
214
-
215
- #function to identify span objects of matched negative phrases from text
216
- def match(nlp,terms,label):
217
- patterns = [nlp.make_doc(text) for text in terms]
218
- matcher = PhraseMatcher(nlp.vocab)
219
- matcher.add(label, None, *patterns)
220
- return matcher
221
-
222
- #replacing the labels for identified negative entities
223
- def overwrite_ent_lbl(matcher, doc):
224
- matches = matcher(doc)
225
- seen_tokens = set()
226
- new_entities = []
227
- entities = doc.ents
228
- for match_id, start, end in matches:
229
- if start not in seen_tokens and end - 1 not in seen_tokens:
230
- new_entities.append(Span(doc, start, end, label=match_id))
231
- entities = [e for e in entities if not (e.start < end and e.end > start)]
232
- seen_tokens.update(range(start, end))
233
- doc.ents = tuple(entities) + tuple(new_entities)
234
- return doc
235
-
236
- #deduplicate repeated entities
237
- def dedupe(items):
238
- seen = set()
239
- for item in items:
240
- item = str(item).strip()
241
- if item not in seen:
242
- yield item
243
- seen.add(item)
244
-
245
- lem_clinical_note= lemmatize(runtext, nlp0)
246
- #creating a doc object using BC5CDR model
247
- doc = nlp1(lem_clinical_note)
248
- options = get_entity_options()
249
-
250
- #list of negative concepts from clinical note identified by negspacy
251
- results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
252
-
253
- matcher = match(nlp1, results0,"NEG_ENTITY")
254
-
255
- #doc0: new doc object with added "NEG_ENTITY label"
256
- doc0 = overwrite_ent_lbl(matcher,doc)
257
-
258
- #visualizing identified Named Entities in clinical input text
259
- ent_html = displacy.render(doc0, style='ent', options=options)
260
 
261
  ##======================== End of NER Tagging ========================
262
 
 
166
 
167
  ##======================== Start of NER Tagging ========================
168
  # ====== Old NER ======
169
+ doc = nlp(str(original_text2))
170
+ colors = { "DISEASE": "pink","CHEMICAL": "orange"}
171
+ options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
172
+ ent_html = displacy.render(doc, style="ent", options=options)
173
  # ====== End of Old NER ======
174
 
175
+ # #lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
176
+ # def lemmatize(note, nlp):
177
+ # doc = nlp(note)
178
+ # lemNote = [wd.lemma_ for wd in doc]
179
+ # return " ".join(lemNote)
180
+
181
+ # #function to modify options for displacy NER visualization
182
+ # def get_entity_options():
183
+ # entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
184
+ # colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
185
+ # options = {"ents": entities, "colors": colors}
186
+ # return options
187
+
188
+ # #adding a new pipeline component to identify negation
189
+ # def neg_model(nlp_model):
190
+ # nlp = spacy.load(nlp_model, disable = ['parser'])
191
+ # # nlp.add_pipe(nlp.create_pipe('sentencizer'))
192
+ # nlp.add_pipe('sentencizer')
193
+ # # negex = Negex(nlp)
194
+ # nlp.add_pipe(
195
+ # "negex",
196
+ # config={
197
+ # "chunk_prefix": ["no"],
198
+ # },
199
+ # last=True)
200
+ # return nlp
201
+
202
+ # def negation_handling(nlp_model, note, neg_model):
203
+ # results = []
204
+ # nlp = neg_model(nlp_model)
205
+ # note = note.split(".") #sentence tokenizing based on delimeter
206
+ # note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
207
+ # for t in note:
208
+ # doc = nlp(t)
209
+ # for e in doc.ents:
210
+ # rs = str(e._.negex)
211
+ # if rs == "True":
212
+ # results.append(e.text)
213
+ # return results
214
+
215
+ # #function to identify span objects of matched negative phrases from text
216
+ # def match(nlp,terms,label):
217
+ # patterns = [nlp.make_doc(text) for text in terms]
218
+ # matcher = PhraseMatcher(nlp.vocab)
219
+ # matcher.add(label, None, *patterns)
220
+ # return matcher
221
+
222
+ # #replacing the labels for identified negative entities
223
+ # def overwrite_ent_lbl(matcher, doc):
224
+ # matches = matcher(doc)
225
+ # seen_tokens = set()
226
+ # new_entities = []
227
+ # entities = doc.ents
228
+ # for match_id, start, end in matches:
229
+ # if start not in seen_tokens and end - 1 not in seen_tokens:
230
+ # new_entities.append(Span(doc, start, end, label=match_id))
231
+ # entities = [e for e in entities if not (e.start < end and e.end > start)]
232
+ # seen_tokens.update(range(start, end))
233
+ # doc.ents = tuple(entities) + tuple(new_entities)
234
+ # return doc
235
+
236
+ # #deduplicate repeated entities
237
+ # def dedupe(items):
238
+ # seen = set()
239
+ # for item in items:
240
+ # item = str(item).strip()
241
+ # if item not in seen:
242
+ # yield item
243
+ # seen.add(item)
244
+
245
+ # lem_clinical_note= lemmatize(runtext, nlp0)
246
+ # #creating a doc object using BC5CDR model
247
+ # doc = nlp1(lem_clinical_note)
248
+ # options = get_entity_options()
249
+
250
+ # #list of negative concepts from clinical note identified by negspacy
251
+ # results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
252
+
253
+ # matcher = match(nlp1, results0,"NEG_ENTITY")
254
+
255
+ # #doc0: new doc object with added "NEG_ENTITY label"
256
+ # doc0 = overwrite_ent_lbl(matcher,doc)
257
+
258
+ # #visualizing identified Named Entities in clinical input text
259
+ # ent_html = displacy.render(doc0, style='ent', options=options)
260
 
261
  ##======================== End of NER Tagging ========================
262