import gradio as gr import tensorflow as tf from transformers import AutoTokenizer from transformers import TFAutoModelForTokenClassification model_name = "d4data/biomedical-ner-all" tokenizer = AutoTokenizer.from_pretrained(model_name) model = TFAutoModelForTokenClassification.from_pretrained(model_name, from_pt=True) def predict_ner(text): inputs = tokenizer.encode(text, return_tensors="tf") outputs = model(inputs)[0] predictions = tf.argmax(outputs, axis=2) tokens = tokenizer.convert_ids_to_tokens(inputs[0]) new_tokens, new_predictions = [], [] for token, prediction in zip(tokens, predictions[0].numpy()): if token.startswith("##"): new_tokens[-1] = new_tokens[-1] + token[2:] else: new_predictions.append(prediction) new_tokens.append(token) entities = [] entity = "" prev_prediction = None for token, prediction in zip(new_tokens, new_predictions): if prediction == 1: if prev_prediction == 0: entities.append(entity[:-1]) entity = "" entity += token + " " prev_prediction = prediction if entity != "": entities.append(entity[:-1]) gr.debug(f"inputs: {inputs}") gr.debug(f"outputs: {outputs}") gr.debug(f"predictions: {predictions}") gr.debug(f"tokens: {tokens}") gr.debug(f"new_tokens: {new_tokens}") gr.debug(f"new_predictions: {new_predictions}") gr.debug(f"entities: {entities}") return entities if entities else "No named entities found." iface = gr.Interface( fn=predict_ner, inputs=gr.inputs.Textbox(label="Input Text"), outputs=gr.outputs.Textbox(label="Named Entities"), ) iface.launch(debug=True)