Spaces:

carisackc
/

Clinical

Build error

App Files Files Community

carisackc commited on Dec 7, 2022

Commit

a2c3ee0

1 Parent(s): 9941c35

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -89

app.py CHANGED Viewed

@@ -167,97 +167,97 @@ def genEntities(ann, entity):
 ##======================== Start of NER Tagging ========================
 # ====== Old NER ======
-doc = nlp(str(original_text2))
-colors = { "DISEASE": "pink","CHEMICAL": "orange"}
-options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
-ent_html = displacy.render(doc, style="ent", options=options)
 # ====== End of Old NER ======
-# #lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
-# def lemmatize(note, nlp):
-    # doc = nlp(note)
-    # lemNote = [wd.lemma_ for wd in doc]
-    # return " ".join(lemNote)
-# #function to modify options for displacy NER visualization
-# def get_entity_options():
-    # entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
-    # colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
-    # options = {"ents": entities, "colors": colors}
-    # return options
-# #adding a new pipeline component to identify negation
-# def neg_model(nlp_model):
-    # nlp = spacy.load(nlp_model, disable = ['parser'])
-# #     nlp.add_pipe(nlp.create_pipe('sentencizer'))
-    # nlp.add_pipe('sentencizer')
-# #     negex = Negex(nlp)
-    # nlp.add_pipe(
-    # "negex",
-    # config={
-        # "chunk_prefix": ["no"],
-    # },
-    # last=True)
-    # return nlp
-# def negation_handling(nlp_model, note, neg_model):
-    # results = []
-    # nlp = neg_model(nlp_model)
-    # note = note.split(".") #sentence tokenizing based on delimeter
-    # note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
-    # for t in note:
-        # doc = nlp(t)
-        # for e in doc.ents:
-            # rs = str(e._.negex)
-            # if rs == "True":
-                # results.append(e.text)
-    # return results
-# #function to identify span objects of matched negative phrases from text
-# def match(nlp,terms,label):
-    # patterns = [nlp.make_doc(text) for text in terms]
-    # matcher = PhraseMatcher(nlp.vocab)
-    # matcher.add(label, None, *patterns)
-    # return matcher
-# #replacing the labels for identified negative entities
-# def overwrite_ent_lbl(matcher, doc):
-    # matches = matcher(doc)
-    # seen_tokens = set()
-    # new_entities = []
-    # entities = doc.ents
-    # for match_id, start, end in matches:
-        # if start not in seen_tokens and end - 1 not in seen_tokens:
-            # new_entities.append(Span(doc, start, end, label=match_id))
-            # entities = [e for e in entities if not (e.start < end and e.end > start)]
-            # seen_tokens.update(range(start, end))
-    # doc.ents = tuple(entities) + tuple(new_entities)
-    # return doc
-# #deduplicate repeated entities
-# def dedupe(items):
-    # seen = set()
-    # for item in items:
-        # item = str(item).strip()
-        # if item not in seen:
-            # yield item
-            # seen.add(item)
-# lem_clinical_note= lemmatize(runtext, nlp0)
-# #creating a doc object using BC5CDR model
-# doc = nlp1(lem_clinical_note)
-# options = get_entity_options()
-# #list of negative concepts from clinical note identified by negspacy
-# results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
-# matcher = match(nlp1, results0,"NEG_ENTITY")
-# #doc0: new doc object with added "NEG_ENTITY label"
-# doc0 = overwrite_ent_lbl(matcher,doc)
-# #visualizing identified Named Entities in clinical input text
-# ent_html = displacy.render(doc0, style='ent', options=options)
 ##======================== End of NER Tagging ========================

 ##======================== Start of NER Tagging ========================
 # ====== Old NER ======
+# doc = nlp(str(original_text2))
+# colors = { "DISEASE": "pink","CHEMICAL": "orange"}
+# options = {"ents": [ "DISEASE", "CHEMICAL"],"colors": colors}
+# ent_html = displacy.render(doc, style="ent", options=options)
 # ====== End of Old NER ======
+#lemmatizing the notes to capture all forms of negation(e.g., deny: denies, denying)
+def lemmatize(note, nlp):
+    doc = nlp(note)
+    lemNote = [wd.lemma_ for wd in doc]
+    return " ".join(lemNote)
+#function to modify options for displacy NER visualization
+def get_entity_options():
+    entities = ["DISEASE", "CHEMICAL", "NEG_ENTITY"]
+    colors = {'DISEASE': 'pink', 'CHEMICAL': 'orange', "NEG_ENTITY":'white'}
+    options = {"ents": entities, "colors": colors}
+    return options
+#adding a new pipeline component to identify negation
+def neg_model(nlp_model):
+    nlp = spacy.load(nlp_model, disable = ['parser'])
+#     nlp.add_pipe(nlp.create_pipe('sentencizer'))
+    nlp.add_pipe('sentencizer')
+#     negex = Negex(nlp)
+    nlp.add_pipe(
+    "negex",
+    config={
+        "chunk_prefix": ["no"],
+    },
+    last=True)
+    return nlp
+def negation_handling(nlp_model, note, neg_model):
+    results = []
+    nlp = neg_model(nlp_model)
+    note = note.split(".") #sentence tokenizing based on delimeter
+    note = [n.strip() for n in note] #removing extra spaces at the begining and end of sentence
+    for t in note:
+        doc = nlp(t)
+        for e in doc.ents:
+            rs = str(e._.negex)
+            if rs == "True":
+                results.append(e.text)
+    return results
+#function to identify span objects of matched negative phrases from text
+def match(nlp,terms,label):
+    patterns = [nlp.make_doc(text) for text in terms]
+    matcher = PhraseMatcher(nlp.vocab)
+    matcher.add(label, None, *patterns)
+    return matcher
+#replacing the labels for identified negative entities
+def overwrite_ent_lbl(matcher, doc):
+    matches = matcher(doc)
+    seen_tokens = set()
+    new_entities = []
+    entities = doc.ents
+    for match_id, start, end in matches:
+        if start not in seen_tokens and end - 1 not in seen_tokens:
+            new_entities.append(Span(doc, start, end, label=match_id))
+            entities = [e for e in entities if not (e.start < end and e.end > start)]
+            seen_tokens.update(range(start, end))
+    doc.ents = tuple(entities) + tuple(new_entities)
+    return doc
+#deduplicate repeated entities
+def dedupe(items):
+    seen = set()
+    for item in items:
+        item = str(item).strip()
+        if item not in seen:
+            yield item
+            seen.add(item)
+lem_clinical_note= lemmatize(runtext, nlp0)
+#creating a doc object using BC5CDR model
+doc = nlp1(lem_clinical_note)
+options = get_entity_options()
+#list of negative concepts from clinical note identified by negspacy
+results0 = negation_handling("en_ner_bc5cdr_md", lem_clinical_note, neg_model)
+matcher = match(nlp1, results0,"NEG_ENTITY")
+#doc0: new doc object with added "NEG_ENTITY label"
+doc0 = overwrite_ent_lbl(matcher,doc)
+#visualizing identified Named Entities in clinical input text
+ent_html = displacy.render(doc0, style='ent', options=options)
 ##======================== End of NER Tagging ========================