Sasidhar's picture
Update app.py
b8d77bb
raw
history blame
8.03 kB
import streamlit as st
import time
from annotated_text import annotated_text
from io import StringIO
from transformers import AutoTokenizer, AutoModelForTokenClassification
import os
from streamlit_text_annotation import text_annotation
os.environ['KMP_DUPLICATE_LIB_OK']='True'
import plotly.express as px
from streamlit_option_menu import option_menu
st. set_page_config(layout="wide")
from transformers import pipeline
import pandas as pd
@st.cache(allow_output_mutation = True)
def init_text_summarization_model():
MODEL = 'facebook/bart-large-cnn'
pipe = pipeline("summarization", model=MODEL)
return pipe
@st.cache(allow_output_mutation = True)
def init_zsl_topic_classification():
MODEL = 'facebook/bart-large-mnli'
pipe = pipeline("zero-shot-classification", model=MODEL)
template = "This text is about {}."
return pipe, template
@st.cache(allow_output_mutation = True)
def init_zsl_topic_classification():
MODEL = 'facebook/bart-large-mnli'
pipe = pipeline("zero-shot-classification", model=MODEL)
template = "This text is about {}."
return pipe, template
@st.cache(allow_output_mutation = True)
def init_ner_pipeline():
tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
pipe = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple") # pass device=0 if using gpu
return pipe
@st.cache(allow_output_mutation = True)
def init_qa_pipeline():
question_answerer_pipe = pipeline("question-answering", model='deepset/roberta-base-squad2')
return question_answerer_pipe
def get_formatted_text_for_annotation(output):
colour_map = {'Coreference': '#29D93B',
'Severity':'#FCF3CF',
'Sex': '#E9F7EF',
'Sign_symptom': '#EAF2F8',
'Detailed_description': '#078E8B',
'Date': '#F5EEF8',
'History': '#FDEDEC',
'Medication': '#F4F6F6',
'Therapeutic_procedure': '#A3E4D7',
'Age': '#85C1E9',
'Subject': '#D7BDE2',
'Biological_structure': '#AF7AC5',
'Activity': '#B2BABB',
'Lab_value': '#E6B0AA',
'Family_history': '#2471A3',
'Diagnostic_procedure': '#CCD1D1',
'Other_event': '#239B56',
'Occupation': '#B3B6B7'}
annotated_texts = []
next_index = 0
for entity in output:
if entity['start'] == next_index:
# print("found entity")
extracted_text = text[entity['start']:entity['end']]
# print("annotated",annotated_text)
annotated_texts.append((extracted_text ,entity['entity_group'],colour_map[entity['entity_group']]))
else:
unannotated_text = text[next_index:entity['start']-1]
annotated_texts.append(unannotated_text)
extracted_text = text[entity['start']:entity['end']]
annotated_texts.append((extracted_text ,entity['entity_group'],colour_map[entity['entity_group']]))
next_index =entity['end'] +1
if next_index < len(text):
annotated_texts.append(text[next_index-1:len(text)-1])
return tuple(annotated_texts)
# Model initialization
pipeline_summarization = init_text_summarization_model()
pipeline_zsl, template = init_zsl_topic_classification()
pipeline_ner =init_ner_pipeline()
pipeline_qa = init_qa_pipeline()
st.header("Intelligent Document Automation")
with st.sidebar:
selected_menu = option_menu("Select Option",
["Upload Document", "Extract Text", "Summarize Document", "Extract Entities","Detected Barriers","Get Answers","Annotation Tool"],
menu_icon="cast", default_index=0)
if selected_menu == "Upload Document":
uploaded_file = st.file_uploader("Choose a file")
if uploaded_file is not None:
ocr_text = get_text_from_ocr_engine()
st.write("Upload Successful")
elif selected_menu == "Extract Text":
with st.spinner("Extracting Text..."):
time.sleep(6)
st.write(get_text_from_ocr_engine())
elif selected_menu == "Summarize Document":
paragraphs= get_paragraphs_for_summaries()
with st.spinner("Finding Topics..."):
tags_found = ["Injury Details", "Past Medical Conditions", "Injury Management Plan", "GP Correspondence"]
time.sleep(5)
st.write("This document is about:")
st.markdown(";".join(["#" + tag + " " for tag in tags_found]) + "**")
st.markdown("""---""")
with st.spinner("Summarizing Document..."):
for text in paragraphs:
summary_text = pipeline_summarization(text, max_length=130, min_length=30, do_sample=False)
# Show output
st.write(summary_text[0]['summary_text'])
st.markdown("""---""")
elif selected_menu == "Extract Entities":
paragraphs= get_paragraphs_for_entities()
with st.spinner("Extracting Entities..."):
for text in paragraphs:
output = pipeline_ner (text)
entities_text =get_formatted_text_for_annotation(output)
annotated_text(*entities_text)
st.markdown("""---""")
elif selected_menu == "Detected Barriers":
#st.subheader('Barriers Detected')
barriers_to_detect = {"Chronic Pain":"Is the patint experiencing chronic pain?",
"Mental Health Issues":"Does he have any mental issues?",
"Prior History":"What is prior medical history?",
"Smoking":"Does he smoke?",
"Drinking":"Does he drink?",
"Comorbidities":"Does he have any comorbidities?"}
with st.spinner("Detecting Barriers..."):
for barrier,question_text in barriers_to_detect.items():
context = get_text_from_ocr_engine()
if question_text:
result = pipeline_qa(question=question_text, context=context)
st.subheader(barrier)
#st.text(result)
if result['score'] < 0.3:
st.text("Not Found")
else:
st.text(result['answer'])
elif selected_menu == "Get Answers":
st.subheader('Question')
question_text = st.text_input("Type your question")
context = get_text_from_ocr_engine()
if question_text:
with st.spinner("Finding Answer(s)..."):
result = pipeline_qa(question=question_text, context=context)
st.subheader('Answer')
st.text(result['answer'])
elif selected_menu == "Annotation Tool":
data1 = {
"tokens": [
{"text": "He", "labels": ["Person"]},
{"text": "loves"},
{"text": "his"},
{"text": "dog", "labels": ["Animal", "Pet"]},
],
"labels": [
{"text": "Person"},
{"text": "Action"},
{"text": "Animal"},
]
}
st.subheader("Display Mode:")
left, right = st.columns(2)
with left:
st.text("Vertical labels:")
text_annotation(data1)
with right:
st.text("Horizontal labels:")
data1["labelOrientation"] = "horizontal"
text_annotation(data1)
data2 = {
"allowEditing": True,
"tokens": [
{"text": "He", "labels": ["Pronoun", "Person"]},
{"text": "loves", "labels": ["Action"]},
{"text": "his"},
{"text": "dog", "labels": ["Animal"]},
],
"labels": [
{"text": "Pronoun", "style": {
"color": "red",
"background-color": "white",
"font-size": "8px",
"border": "3px dashed red",
}},
{"text": "Verb", "style": {
"color": "green",
"background-color": "white",
"font-size": "8px",
"font-weight": "900",
}},
{"text": "Noun", "style": {
"color": "blue",
"background-color": "white",
"font-size": "8px",
}},
{"text": "Person"},
{"text": "Animal"},
]
}
st.subheader("Edit Mode:")
data = text_annotation(data2)
if data:
"Returned data:", data