Spaces:

flax-community
/

medclip-demo

Runtime error

File size: 2,416 Bytes

9b070b0
 
 
fed60af
 
9b070b0
fed60af
 
 
 
 
 
 
 
 
a46f695
 
fed60af
 
 
 
 
 
 
 
 
 
 
 
9b070b0
 
 
fed60af
9b070b0
fed60af
 
 
a46f695
fed60af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b070b0
 
 
 
 
119fb19
9b070b0
 
 
 
db27411
b191987
119fb19
67921c3
119fb19
 
9b070b0
db27411

import sys 

import gradio as gr
import jax
from huggingface_hub import snapshot_download
from PIL import Image
from transformers import AutoTokenizer
import torch
from torchvision.io import ImageReadMode, read_image


LOCAL_PATH = snapshot_download("flax-community/medclip")
sys.path.append(LOCAL_PATH)

from src.modeling_medclip import FlaxMedCLIP
from run_medclip import Transform

def prepare_image(image_path, model):
    image = read_image(image_path, mode=ImageReadMode.RGB)
    preprocess = Transform(model.config.vision_config.image_size)
    preprocess = torch.jit.script(preprocess)
    preprocessed_image = preprocess(image)
    pixel_values = torch.stack([preprocessed_image]).permute(0, 2, 3, 1).numpy()
    return pixel_values

def prepare_text(text, tokenizer):
    return tokenizer(text, return_tensors="np")

def save_file_to_disk(uplaoded_file):
    temp_file = "/tmp/image.jpeg"
    im = Image.fromarray(uplaoded_file)
    im.save(temp_file)
    return temp_file

def load_tokenizer_and_model():
    # load the saved model
    tokenizer = AutoTokenizer.from_pretrained("allenai/scibert_scivocab_uncased")
    model = FlaxMedCLIP.from_pretrained(LOCAL_PATH)
    return tokenizer, model

def run_inference(image_path, text, model, tokenizer):
    pixel_values = prepare_image(image_path, model)
    input_text = prepare_text(text, tokenizer)
    model_output = model(
        input_text["input_ids"],
        pixel_values,
        attention_mask=input_text["attention_mask"],
        train=False,
        return_dict=True,
    )
    logits = model_output["logits_per_image"]
    score = jax.nn.sigmoid(logits)[0][0]
    return score

tokenizer, model = load_tokenizer_and_model()

def score_image_caption_pair(uploaded_file, text_input):
    local_image_path = save_file_to_disk(uploaded_file)
    score = run_inference(
        local_image_path, text_input, model, tokenizer).tolist()
    return {"Score": score}


image = gr.inputs.Image(shape=(299, 299))
iface = gr.Interface(
    fn=score_image_caption_pair, inputs=[image, "text"], outputs=["label"], allow_flagging=False, allow_screenshot=False,
    title="Your personal TA",
    description="""
    The purpose of this demo is to help medical students measure their diagnostic capabilities in purely academic settings.
    Under no circumstances should it be used to make a self-diagnosis or confront a real doctor.
    """
)
iface.launch()