Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -167,97 +167,97 @@ def genEntities(ann, entity):
|
|
167 |
|
168 |
##======================== Start of NER Tagging ========================
|
169 |
# ====== Old NER ======
|
170 |
-
doc = nlp(str(original_text2))
|
171 |
-
colors = { "DISEASE": "pink","CHEMICAL": "orange"}
|
172 |
-
options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
|
173 |
-
ent_html = displacy.render(doc, style="ent", options=options)
|
174 |
# ====== End of Old NER ======
|
175 |
|
176 |
-
#
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
#
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
#
|
190 |
-
|
191 |
-
|
192 |
-
#
|
193 |
-
|
194 |
-
#
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
#
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
#
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
#
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
#
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
#
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
#
|
257 |
-
|
258 |
-
|
259 |
-
#
|
260 |
-
|
261 |
|
262 |
##======================== End of NER Tagging ========================
|
263 |
|
|
|
167 |
|
168 |
##======================== Start of NER Tagging ========================
|
169 |
# ====== Old NER ======
|
170 |
+
# doc = nlp(str(original_text2))
|
171 |
+
# colors = { "DISEASE": "pink","CHEMICAL": "orange"}
|
172 |
+
# options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
|
173 |
+
# ent_html = displacy.render(doc, style="ent", options=options)
|
174 |
# ====== End of Old NER ======
|
175 |
|
176 |
+
#lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
|
177 |
+
def lemmatize(note, nlp):
|
178 |
+
doc = nlp(note)
|
179 |
+
lemNote = [wd.lemma_ for wd in doc]
|
180 |
+
return " ".join(lemNote)
|
181 |
+
|
182 |
+
#function to modify options for displacy NER visualization
|
183 |
+
def get_entity_options():
|
184 |
+
entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
|
185 |
+
colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
|
186 |
+
options = {"ents": entities, "colors": colors}
|
187 |
+
return options
|
188 |
+
|
189 |
+
#adding a new pipeline component to identify negation
|
190 |
+
def neg_model(nlp_model):
|
191 |
+
nlp = spacy.load(nlp_model, disable = ['parser'])
|
192 |
+
# nlp.add_pipe(nlp.create_pipe('sentencizer'))
|
193 |
+
nlp.add_pipe('sentencizer')
|
194 |
+
# negex = Negex(nlp)
|
195 |
+
nlp.add_pipe(
|
196 |
+
"negex",
|
197 |
+
config={
|
198 |
+
"chunk_prefix": ["no"],
|
199 |
+
},
|
200 |
+
last=True)
|
201 |
+
return nlp
|
202 |
+
|
203 |
+
def negation_handling(nlp_model, note, neg_model):
|
204 |
+
results = []
|
205 |
+
nlp = neg_model(nlp_model)
|
206 |
+
note = note.split(".") #sentence tokenizing based on delimeter
|
207 |
+
note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
|
208 |
+
for t in note:
|
209 |
+
doc = nlp(t)
|
210 |
+
for e in doc.ents:
|
211 |
+
rs = str(e._.negex)
|
212 |
+
if rs == "True":
|
213 |
+
results.append(e.text)
|
214 |
+
return results
|
215 |
+
|
216 |
+
#function to identify span objects of matched negative phrases from text
|
217 |
+
def match(nlp,terms,label):
|
218 |
+
patterns = [nlp.make_doc(text) for text in terms]
|
219 |
+
matcher = PhraseMatcher(nlp.vocab)
|
220 |
+
matcher.add(label, None, *patterns)
|
221 |
+
return matcher
|
222 |
+
|
223 |
+
#replacing the labels for identified negative entities
|
224 |
+
def overwrite_ent_lbl(matcher, doc):
|
225 |
+
matches = matcher(doc)
|
226 |
+
seen_tokens = set()
|
227 |
+
new_entities = []
|
228 |
+
entities = doc.ents
|
229 |
+
for match_id, start, end in matches:
|
230 |
+
if start not in seen_tokens and end - 1 not in seen_tokens:
|
231 |
+
new_entities.append(Span(doc, start, end, label=match_id))
|
232 |
+
entities = [e for e in entities if not (e.start < end and e.end > start)]
|
233 |
+
seen_tokens.update(range(start, end))
|
234 |
+
doc.ents = tuple(entities) + tuple(new_entities)
|
235 |
+
return doc
|
236 |
+
|
237 |
+
#deduplicate repeated entities
|
238 |
+
def dedupe(items):
|
239 |
+
seen = set()
|
240 |
+
for item in items:
|
241 |
+
item = str(item).strip()
|
242 |
+
if item not in seen:
|
243 |
+
yield item
|
244 |
+
seen.add(item)
|
245 |
+
|
246 |
+
lem_clinical_note= lemmatize(runtext, nlp0)
|
247 |
+
#creating a doc object using BC5CDR model
|
248 |
+
doc = nlp1(lem_clinical_note)
|
249 |
+
options = get_entity_options()
|
250 |
+
|
251 |
+
#list of negative concepts from clinical note identified by negspacy
|
252 |
+
results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
|
253 |
+
|
254 |
+
matcher = match(nlp1, results0,"NEG_ENTITY")
|
255 |
+
|
256 |
+
#doc0: new doc object with added "NEG_ENTITY label"
|
257 |
+
doc0 = overwrite_ent_lbl(matcher,doc)
|
258 |
+
|
259 |
+
#visualizing identified Named Entities in clinical input text
|
260 |
+
ent_html = displacy.render(doc0, style='ent', options=options)
|
261 |
|
262 |
##======================== End of NER Tagging ========================
|
263 |
|