import gradio as gr
import tensorflow as tf
from transformers import AutoTokenizer
from transformers import TFAutoModelForTokenClassification

model_name = "d4data/biomedical-ner-all"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForTokenClassification.from_pretrained(model_name, from_pt=True)


def predict_ner(text):
    inputs = tokenizer.encode(text, return_tensors="tf")
    outputs = model(inputs)[0]
    predictions = tf.argmax(outputs, axis=2)

    tokens = tokenizer.convert_ids_to_tokens(inputs[0])
    new_tokens, new_predictions = [], []
    for token, prediction in zip(tokens, predictions[0].numpy()):
        if token.startswith("##"):
            new_tokens[-1] = new_tokens[-1] + token[2:]
        else:
            new_predictions.append(prediction)
            new_tokens.append(token)

    entities = []
    entity = ""
    prev_prediction = None
    for token, prediction in zip(new_tokens, new_predictions):
        if prediction == 1:
            if prev_prediction == 0:
                entities.append(entity[:-1])
                entity = ""
            entity += token + " "
        prev_prediction = prediction
    if entity != "":
        entities.append(entity[:-1])
	
    gr.debug(f"inputs: {inputs}")
    gr.debug(f"outputs: {outputs}")
    gr.debug(f"predictions: {predictions}")
    gr.debug(f"tokens: {tokens}")
    gr.debug(f"new_tokens: {new_tokens}")
    gr.debug(f"new_predictions: {new_predictions}")
    gr.debug(f"entities: {entities}")

    return entities if entities else "No named entities found."


iface = gr.Interface(
    fn=predict_ner,
    inputs=gr.inputs.Textbox(label="Input Text"),
    outputs=gr.outputs.Textbox(label="Named Entities"),
)

iface.launch(debug=True)