carisackc commited on
Commit
e49ec72
·
1 Parent(s): 4e99338

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -2
app.py CHANGED
@@ -23,7 +23,9 @@ if "visibility" not in st.session_state:
23
  st.session_state.disabled = False
24
 
25
  #nlp = en_core_web_lg.load()
26
- nlp = spacy.load("en_ner_bc5cdr_md")
 
 
27
 
28
  st.set_page_config(page_title ='Clinical Note Summarization',
29
  #page_icon= "Notes",
@@ -150,7 +152,95 @@ def genEntities(ann, entity):
150
  entlist = ",".join(ent)
151
  st.markdown(f'<p style="background-color:{ent_col[entity]};color:#080808;font-size:16px;">{entlist}</p>', unsafe_allow_html=True)
152
 
153
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  def visualize (run_text,output):
155
  text =''
156
  splitruntext = [x for x in runtext.split('.')]
@@ -185,6 +275,7 @@ def run_model(input_text):
185
  st.write('Summary')
186
 
187
 
 
188
  st.success(output)
189
 
190
  doc = nlp(str(original_text2))
 
23
  st.session_state.disabled = False
24
 
25
  #nlp = en_core_web_lg.load()
26
+ #nlp = spacy.load("en_ner_bc5cdr_md")
27
+ nlp0 = spacy.load("en_core_sci_sm")
28
+ nlp1 = spacy.load("en_ner_bc5cdr_md")
29
 
30
  st.set_page_config(page_title ='Clinical Note Summarization',
31
  #page_icon= "Notes",
 
152
  entlist = ",".join(ent)
153
  st.markdown(f'<p style="background-color:{ent_col[entity]};color:#080808;font-size:16px;">{entlist}</p>', unsafe_allow_html=True)
154
 
155
+ #lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
156
+ def lemmatize(note, nlp):
157
+ doc = nlp(note)
158
+ lemNote = [wd.lemma_ for wd in doc]
159
+ return " ".join(lemNote)
160
+
161
+ #function to modify options for displacy NER visualization
162
+ def get_entity_options():
163
+ entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
164
+ colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
165
+ options = {"ents": entities, "colors": colors}
166
+ return options
167
+
168
+ #adding a new pipeline component to identify negation
169
+ def neg_model(nlp_model):
170
+ nlp = spacy.load(nlp_model, disable = ['parser'])
171
+ # nlp.add_pipe(nlp.create_pipe('sentencizer'))
172
+ nlp.add_pipe('sentencizer')
173
+ # negex = Negex(nlp)
174
+ nlp.add_pipe(
175
+ "negex",
176
+ config={
177
+ "chunk_prefix": ["no"],
178
+ },
179
+ last=True)
180
+ return nlp
181
+ """
182
+ Negspacy sets a new attribute e._.negex to True if a negative concept is encountered
183
+ """
184
+ def negation_handling(nlp_model, note, neg_model):
185
+ results = []
186
+ nlp = neg_model(nlp_model)
187
+ note = note.split(".") #sentence tokenizing based on delimeter
188
+ note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
189
+ for t in note:
190
+ doc = nlp(t)
191
+ for e in doc.ents:
192
+ rs = str(e._.negex)
193
+ if rs == "True":
194
+ results.append(e.text)
195
+ return results
196
+
197
+ #function to identify span objects of matched negative phrases from text
198
+ def match(nlp,terms,label):
199
+ patterns = [nlp.make_doc(text) for text in terms]
200
+ matcher = PhraseMatcher(nlp.vocab)
201
+ matcher.add(label, None, *patterns)
202
+ return matcher
203
+
204
+ #replacing the labels for identified negative entities
205
+ def overwrite_ent_lbl(matcher, doc):
206
+ matches = matcher(doc)
207
+ seen_tokens = set()
208
+ new_entities = []
209
+ entities = doc.ents
210
+ for match_id, start, end in matches:
211
+ if start not in seen_tokens and end - 1 not in seen_tokens:
212
+ new_entities.append(Span(doc, start, end, label=match_id))
213
+ entities = [e for e in entities if not (e.start < end and e.end > start)]
214
+ seen_tokens.update(range(start, end))
215
+ doc.ents = tuple(entities) + tuple(new_entities)
216
+ return doc
217
+
218
+ #deduplicate repeated entities
219
+ def dedupe(items):
220
+ seen = set()
221
+ for item in items:
222
+ item = str(item).strip()
223
+ if item not in seen:
224
+ yield item
225
+ seen.add(item)
226
+
227
+ lem_clinical_note= lemmatize(text, nlp0)
228
+ #creating a doc object using BC5CDR model
229
+ doc = nlp1(lem_clinical_note)
230
+ options = get_entity_options()
231
+
232
+ #list of negative concepts from clinical note identified by negspacy
233
+ results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
234
+
235
+ matcher = match(nlp1, results0,"NEG_ENTITY")
236
+
237
+ #doc0: new doc object with added "NEG_ENTITY label"
238
+ doc0 = overwrite_ent_lbl(matcher,doc)
239
+
240
+ #visualizing identified Named Entities in clinical input text
241
+ displacy.render(doc0, style='ent', options=options)
242
+
243
+ ##=== end of NER tagger===
244
  def visualize (run_text,output):
245
  text =''
246
  splitruntext = [x for x in runtext.split('.')]
 
275
  st.write('Summary')
276
 
277
 
278
+
279
  st.success(output)
280
 
281
  doc = nlp(str(original_text2))