Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -166,97 +166,97 @@ def genEntities(ann, entity):
|
|
166 |
|
167 |
##======================== Start of NER Tagging ========================
|
168 |
# ====== Old NER ======
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
# ====== End of Old NER ======
|
174 |
|
175 |
-
#lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
|
176 |
-
def lemmatize(note, nlp):
|
177 |
-
doc = nlp(note)
|
178 |
-
lemNote = [wd.lemma_ for wd in doc]
|
179 |
-
return " ".join(lemNote)
|
180 |
-
|
181 |
-
#function to modify options for displacy NER visualization
|
182 |
-
def get_entity_options():
|
183 |
-
entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
|
184 |
-
colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
|
185 |
-
options = {"ents": entities, "colors": colors}
|
186 |
-
return options
|
187 |
-
|
188 |
-
#adding a new pipeline component to identify negation
|
189 |
-
def neg_model(nlp_model):
|
190 |
-
nlp = spacy.load(nlp_model, disable = ['parser'])
|
191 |
-
# nlp.add_pipe(nlp.create_pipe('sentencizer'))
|
192 |
-
nlp.add_pipe('sentencizer')
|
193 |
-
# negex = Negex(nlp)
|
194 |
-
nlp.add_pipe(
|
195 |
-
"negex",
|
196 |
-
config={
|
197 |
-
"chunk_prefix": ["no"],
|
198 |
-
},
|
199 |
-
last=True)
|
200 |
-
return nlp
|
201 |
-
|
202 |
-
def negation_handling(nlp_model, note, neg_model):
|
203 |
-
results = []
|
204 |
-
nlp = neg_model(nlp_model)
|
205 |
-
note = note.split(".") #sentence tokenizing based on delimeter
|
206 |
-
note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
|
207 |
-
for t in note:
|
208 |
-
doc = nlp(t)
|
209 |
-
for e in doc.ents:
|
210 |
-
rs = str(e._.negex)
|
211 |
-
if rs == "True":
|
212 |
-
results.append(e.text)
|
213 |
-
return results
|
214 |
-
|
215 |
-
#function to identify span objects of matched negative phrases from text
|
216 |
-
def match(nlp,terms,label):
|
217 |
-
patterns = [nlp.make_doc(text) for text in terms]
|
218 |
-
matcher = PhraseMatcher(nlp.vocab)
|
219 |
-
matcher.add(label, None, *patterns)
|
220 |
-
return matcher
|
221 |
-
|
222 |
-
#replacing the labels for identified negative entities
|
223 |
-
def overwrite_ent_lbl(matcher, doc):
|
224 |
-
matches = matcher(doc)
|
225 |
-
seen_tokens = set()
|
226 |
-
new_entities = []
|
227 |
-
entities = doc.ents
|
228 |
-
for match_id, start, end in matches:
|
229 |
-
if start not in seen_tokens and end - 1 not in seen_tokens:
|
230 |
-
new_entities.append(Span(doc, start, end, label=match_id))
|
231 |
-
entities = [e for e in entities if not (e.start < end and e.end > start)]
|
232 |
-
seen_tokens.update(range(start, end))
|
233 |
-
doc.ents = tuple(entities) + tuple(new_entities)
|
234 |
-
return doc
|
235 |
-
|
236 |
-
#deduplicate repeated entities
|
237 |
-
def dedupe(items):
|
238 |
-
seen = set()
|
239 |
-
for item in items:
|
240 |
-
item = str(item).strip()
|
241 |
-
if item not in seen:
|
242 |
-
yield item
|
243 |
-
seen.add(item)
|
244 |
-
|
245 |
-
lem_clinical_note= lemmatize(runtext, nlp0)
|
246 |
-
#creating a doc object using BC5CDR model
|
247 |
-
doc = nlp1(lem_clinical_note)
|
248 |
-
options = get_entity_options()
|
249 |
-
|
250 |
-
#list of negative concepts from clinical note identified by negspacy
|
251 |
-
results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
|
252 |
-
|
253 |
-
matcher = match(nlp1, results0,"NEG_ENTITY")
|
254 |
-
|
255 |
-
#doc0: new doc object with added "NEG_ENTITY label"
|
256 |
-
doc0 = overwrite_ent_lbl(matcher,doc)
|
257 |
-
|
258 |
-
#visualizing identified Named Entities in clinical input text
|
259 |
-
ent_html = displacy.render(doc0, style='ent', options=options)
|
260 |
|
261 |
##======================== End of NER Tagging ========================
|
262 |
|
|
|
166 |
|
167 |
##======================== Start of NER Tagging ========================
|
168 |
# ====== Old NER ======
|
169 |
+
doc = nlp(str(original_text2))
|
170 |
+
colors = { "DISEASE": "pink","CHEMICAL": "orange"}
|
171 |
+
options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
|
172 |
+
ent_html = displacy.render(doc, style="ent", options=options)
|
173 |
# ====== End of Old NER ======
|
174 |
|
175 |
+
# #lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
|
176 |
+
# def lemmatize(note, nlp):
|
177 |
+
# doc = nlp(note)
|
178 |
+
# lemNote = [wd.lemma_ for wd in doc]
|
179 |
+
# return " ".join(lemNote)
|
180 |
+
|
181 |
+
# #function to modify options for displacy NER visualization
|
182 |
+
# def get_entity_options():
|
183 |
+
# entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
|
184 |
+
# colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
|
185 |
+
# options = {"ents": entities, "colors": colors}
|
186 |
+
# return options
|
187 |
+
|
188 |
+
# #adding a new pipeline component to identify negation
|
189 |
+
# def neg_model(nlp_model):
|
190 |
+
# nlp = spacy.load(nlp_model, disable = ['parser'])
|
191 |
+
# # nlp.add_pipe(nlp.create_pipe('sentencizer'))
|
192 |
+
# nlp.add_pipe('sentencizer')
|
193 |
+
# # negex = Negex(nlp)
|
194 |
+
# nlp.add_pipe(
|
195 |
+
# "negex",
|
196 |
+
# config={
|
197 |
+
# "chunk_prefix": ["no"],
|
198 |
+
# },
|
199 |
+
# last=True)
|
200 |
+
# return nlp
|
201 |
+
|
202 |
+
# def negation_handling(nlp_model, note, neg_model):
|
203 |
+
# results = []
|
204 |
+
# nlp = neg_model(nlp_model)
|
205 |
+
# note = note.split(".") #sentence tokenizing based on delimeter
|
206 |
+
# note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
|
207 |
+
# for t in note:
|
208 |
+
# doc = nlp(t)
|
209 |
+
# for e in doc.ents:
|
210 |
+
# rs = str(e._.negex)
|
211 |
+
# if rs == "True":
|
212 |
+
# results.append(e.text)
|
213 |
+
# return results
|
214 |
+
|
215 |
+
# #function to identify span objects of matched negative phrases from text
|
216 |
+
# def match(nlp,terms,label):
|
217 |
+
# patterns = [nlp.make_doc(text) for text in terms]
|
218 |
+
# matcher = PhraseMatcher(nlp.vocab)
|
219 |
+
# matcher.add(label, None, *patterns)
|
220 |
+
# return matcher
|
221 |
+
|
222 |
+
# #replacing the labels for identified negative entities
|
223 |
+
# def overwrite_ent_lbl(matcher, doc):
|
224 |
+
# matches = matcher(doc)
|
225 |
+
# seen_tokens = set()
|
226 |
+
# new_entities = []
|
227 |
+
# entities = doc.ents
|
228 |
+
# for match_id, start, end in matches:
|
229 |
+
# if start not in seen_tokens and end - 1 not in seen_tokens:
|
230 |
+
# new_entities.append(Span(doc, start, end, label=match_id))
|
231 |
+
# entities = [e for e in entities if not (e.start < end and e.end > start)]
|
232 |
+
# seen_tokens.update(range(start, end))
|
233 |
+
# doc.ents = tuple(entities) + tuple(new_entities)
|
234 |
+
# return doc
|
235 |
+
|
236 |
+
# #deduplicate repeated entities
|
237 |
+
# def dedupe(items):
|
238 |
+
# seen = set()
|
239 |
+
# for item in items:
|
240 |
+
# item = str(item).strip()
|
241 |
+
# if item not in seen:
|
242 |
+
# yield item
|
243 |
+
# seen.add(item)
|
244 |
+
|
245 |
+
# lem_clinical_note= lemmatize(runtext, nlp0)
|
246 |
+
# #creating a doc object using BC5CDR model
|
247 |
+
# doc = nlp1(lem_clinical_note)
|
248 |
+
# options = get_entity_options()
|
249 |
+
|
250 |
+
# #list of negative concepts from clinical note identified by negspacy
|
251 |
+
# results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
|
252 |
+
|
253 |
+
# matcher = match(nlp1, results0,"NEG_ENTITY")
|
254 |
+
|
255 |
+
# #doc0: new doc object with added "NEG_ENTITY label"
|
256 |
+
# doc0 = overwrite_ent_lbl(matcher,doc)
|
257 |
+
|
258 |
+
# #visualizing identified Named Entities in clinical input text
|
259 |
+
# ent_html = displacy.render(doc0, style='ent', options=options)
|
260 |
|
261 |
##======================== End of NER Tagging ========================
|
262 |
|