from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import gradio as gr model = SentenceTransformer("sentence-transformers/clip-ViT-B-16") def predict(im1, im2): image_embs = model.encode([im1, im2]) similarities = cosine_similarity(image_embs) sim = similarities[0][1] threshold = 0.65 if sim > threshold: return sim, "SAME PERSON, UNLOCK PHONE" else: return sim, "DIFFERENT PEOPLE, DON'T UNLOCK" with gr.Blocks() as demo: gr.Markdown("Based on two images, the goal is to recognize the similarities/differences between facial images and determine whether or not to unlock a phone based on a cosine similarity score.") with gr.Tab("Image"): with gr.Row(): with gr.Column(): img_inputs = [gr.Image(type="pil", source="upload"), gr.Image(type="pil", source="upload")] examples = gr.Examples([["https://live.staticflickr.com/2883/33785597726_47880fa539_b.jpg","https://live.staticflickr.com/65535/49086637987_f7622c3345.jpg"], ["https://live.staticflickr.com/3423/3197571945_123937185f_b.jpg", "https://live.staticflickr.com/7259/7001667239_11cece02c8_b.jpg"], ["https://live.staticflickr.com/4015/4334237247_08af133b4b_b.jpg", "https://live.staticflickr.com/3701/9364116426_87b8918e9d_b.jpg"]], inputs=img_inputs) btn = gr.Button("Run") with gr.Column(): btn.click(fn=predict, inputs=img_inputs, outputs=[gr.Number(label="Similarity"), gr.Textbox(label="Message")], ) with gr.Tab("Webcam"): with gr.Row(): with gr.Column(): img_inputs = [gr.Image(type="pil", source="webcam"), gr.Image(type="pil", source="webcam")] btn = gr.Button("Run") with gr.Column(): btn.click(fn=predict, inputs=img_inputs, outputs=[gr.Number(label="Similarity"), gr.Textbox(label="Message")], ) demo.launch(debug=True)