import streamlit as st from annotated_text import annotated_text import transformers ENTITY_TO_COLOR = { 'DepositProduct': '#edff87', 'Product': '#d586ff', 'ProductProblemInfo': '#9886ff', 'ServiceInformation': '#ff9886', 'ServiceClosest': '#ff86b0', 'Location': '#d461be', 'ServiceNumber': '#f9cde4', 'Brand': '#ffd4a4', 'Campaign': '#bcffd8', 'ProductSelector': '#fb5d4e', 'SpecialCampaign': '#f56286', } @st.cache(allow_output_mutation=True, show_spinner=False) def get_pipe(): model_name = "pnr-svc/distilbert-turkish-ner" model = transformers.AutoModelForTokenClassification.from_pretrained(model_name) tokenizer = transformers.AutoTokenizer.from_pretrained(model_name) pipe = transformers.pipeline("token-classification", model=model, tokenizer=tokenizer, aggregation_strategy="simple") return pipe def parse_text(text, prediction): start = 0 parsed_text = [] for p in prediction: parsed_text.append(text[start:p["start"]]) parsed_text.append((p["word"], p["entity_group"], ENTITY_TO_COLOR[p["entity_group"]])) start = p["end"] parsed_text.append(text[start:]) return parsed_text st.set_page_config(page_title="NER ARÇELİK") st.title("Named Entity Recognition") st.write("Type text into the text box and then press 'Predict' to get the named entities.") default_text = "tekirdağ çerkezköy arçelik yetkili servis no paylaş" text = st.text_area('Enter text here:', value=default_text) submit = st.button('Predict') with st.spinner("Loading model..."): pipe = get_pipe() if (submit and len(text.strip()) > 0) or len(text.strip()) > 0: prediction = pipe(text) parsed_text = parse_text(text, prediction) st.header("Prediction:") annotated_text(*parsed_text) st.header('Raw values:') st.json(prediction)