Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -23,7 +23,9 @@ if "visibility" not in st.session_state:
|
|
23 |
st.session_state.disabled = False
|
24 |
|
25 |
#nlp = en_core_web_lg.load()
|
26 |
-
nlp = spacy.load("en_ner_bc5cdr_md")
|
|
|
|
|
27 |
|
28 |
st.set_page_config(page_title ='Clinical Note Summarization',
|
29 |
#page_icon= "Notes",
|
@@ -150,7 +152,95 @@ def genEntities(ann, entity):
|
|
150 |
entlist = ",".join(ent)
|
151 |
st.markdown(f'<p style="background-color:{ent_col[entity]};color:#080808;font-size:16px;">{entlist}</p>', unsafe_allow_html=True)
|
152 |
|
153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
def visualize (run_text,output):
|
155 |
text =''
|
156 |
splitruntext = [x for x in runtext.split('.')]
|
@@ -185,6 +275,7 @@ def run_model(input_text):
|
|
185 |
st.write('Summary')
|
186 |
|
187 |
|
|
|
188 |
st.success(output)
|
189 |
|
190 |
doc = nlp(str(original_text2))
|
|
|
23 |
st.session_state.disabled = False
|
24 |
|
25 |
#nlp = en_core_web_lg.load()
|
26 |
+
#nlp = spacy.load("en_ner_bc5cdr_md")
|
27 |
+
nlp0 = spacy.load("en_core_sci_sm")
|
28 |
+
nlp1 = spacy.load("en_ner_bc5cdr_md")
|
29 |
|
30 |
st.set_page_config(page_title ='Clinical Note Summarization',
|
31 |
#page_icon= "Notes",
|
|
|
152 |
entlist = ",".join(ent)
|
153 |
st.markdown(f'<p style="background-color:{ent_col[entity]};color:#080808;font-size:16px;">{entlist}</p>', unsafe_allow_html=True)
|
154 |
|
155 |
+
#lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
|
156 |
+
def lemmatize(note, nlp):
|
157 |
+
doc = nlp(note)
|
158 |
+
lemNote = [wd.lemma_ for wd in doc]
|
159 |
+
return " ".join(lemNote)
|
160 |
+
|
161 |
+
#function to modify options for displacy NER visualization
|
162 |
+
def get_entity_options():
|
163 |
+
entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
|
164 |
+
colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
|
165 |
+
options = {"ents": entities, "colors": colors}
|
166 |
+
return options
|
167 |
+
|
168 |
+
#adding a new pipeline component to identify negation
|
169 |
+
def neg_model(nlp_model):
|
170 |
+
nlp = spacy.load(nlp_model, disable = ['parser'])
|
171 |
+
# nlp.add_pipe(nlp.create_pipe('sentencizer'))
|
172 |
+
nlp.add_pipe('sentencizer')
|
173 |
+
# negex = Negex(nlp)
|
174 |
+
nlp.add_pipe(
|
175 |
+
"negex",
|
176 |
+
config={
|
177 |
+
"chunk_prefix": ["no"],
|
178 |
+
},
|
179 |
+
last=True)
|
180 |
+
return nlp
|
181 |
+
"""
|
182 |
+
Negspacy sets a new attribute e._.negex to True if a negative concept is encountered
|
183 |
+
"""
|
184 |
+
def negation_handling(nlp_model, note, neg_model):
|
185 |
+
results = []
|
186 |
+
nlp = neg_model(nlp_model)
|
187 |
+
note = note.split(".") #sentence tokenizing based on delimeter
|
188 |
+
note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
|
189 |
+
for t in note:
|
190 |
+
doc = nlp(t)
|
191 |
+
for e in doc.ents:
|
192 |
+
rs = str(e._.negex)
|
193 |
+
if rs == "True":
|
194 |
+
results.append(e.text)
|
195 |
+
return results
|
196 |
+
|
197 |
+
#function to identify span objects of matched negative phrases from text
|
198 |
+
def match(nlp,terms,label):
|
199 |
+
patterns = [nlp.make_doc(text) for text in terms]
|
200 |
+
matcher = PhraseMatcher(nlp.vocab)
|
201 |
+
matcher.add(label, None, *patterns)
|
202 |
+
return matcher
|
203 |
+
|
204 |
+
#replacing the labels for identified negative entities
|
205 |
+
def overwrite_ent_lbl(matcher, doc):
|
206 |
+
matches = matcher(doc)
|
207 |
+
seen_tokens = set()
|
208 |
+
new_entities = []
|
209 |
+
entities = doc.ents
|
210 |
+
for match_id, start, end in matches:
|
211 |
+
if start not in seen_tokens and end - 1 not in seen_tokens:
|
212 |
+
new_entities.append(Span(doc, start, end, label=match_id))
|
213 |
+
entities = [e for e in entities if not (e.start < end and e.end > start)]
|
214 |
+
seen_tokens.update(range(start, end))
|
215 |
+
doc.ents = tuple(entities) + tuple(new_entities)
|
216 |
+
return doc
|
217 |
+
|
218 |
+
#deduplicate repeated entities
|
219 |
+
def dedupe(items):
|
220 |
+
seen = set()
|
221 |
+
for item in items:
|
222 |
+
item = str(item).strip()
|
223 |
+
if item not in seen:
|
224 |
+
yield item
|
225 |
+
seen.add(item)
|
226 |
+
|
227 |
+
lem_clinical_note= lemmatize(text, nlp0)
|
228 |
+
#creating a doc object using BC5CDR model
|
229 |
+
doc = nlp1(lem_clinical_note)
|
230 |
+
options = get_entity_options()
|
231 |
+
|
232 |
+
#list of negative concepts from clinical note identified by negspacy
|
233 |
+
results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
|
234 |
+
|
235 |
+
matcher = match(nlp1, results0,"NEG_ENTITY")
|
236 |
+
|
237 |
+
#doc0: new doc object with added "NEG_ENTITY label"
|
238 |
+
doc0 = overwrite_ent_lbl(matcher,doc)
|
239 |
+
|
240 |
+
#visualizing identified Named Entities in clinical input text
|
241 |
+
displacy.render(doc0, style='ent', options=options)
|
242 |
+
|
243 |
+
##=== end of NER tagger===
|
244 |
def visualize (run_text,output):
|
245 |
text =''
|
246 |
splitruntext = [x for x in runtext.split('.')]
|
|
|
275 |
st.write('Summary')
|
276 |
|
277 |
|
278 |
+
|
279 |
st.success(output)
|
280 |
|
281 |
doc = nlp(str(original_text2))
|