from transformers import ViTImageProcessor, AutoModelForImageClassification import torch import gradio as gr import os import glob import mediapipe as mp import numpy as np from PIL import Image feature_extractor = ViTImageProcessor.from_pretrained('ArdyL/VIT_SIBI_ALL') model = AutoModelForImageClassification.from_pretrained('ArdyL/VIT_SIBI_ALL') mp_hands = mp.solutions.hands # mp_drawing_styles = mp.solutions.drawing_styles # mp_holistic = mp.solutions.holistic # mp_pose = mp.solutions.pose mp_drawing = mp.solutions.drawing_utils examples_dir = './' example_files = glob.glob(os.path.join(examples_dir, '*.jpg')) def preprocess(im): with mp_hands.Hands(min_detection_confidence=0.3, min_tracking_confidence=0.3) as hands: # Read image file with cv2 and process with face_mesh results = hands.process(im) image2 = np.array(im) annotated_image = image2.copy() annotated_image = np.empty(annotated_image.shape) annotated_image.fill(255) hand_found = bool(results.multi_hand_landmarks) if hand_found: for hand_landmarks in results.multi_hand_landmarks: mp_drawing.draw_landmarks(annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS, mp_drawing.DrawingSpec( color=(0, 0, 0), thickness=2, circle_radius=2), mp_drawing.DrawingSpec( color=(0, 0, 0), thickness=2, circle_radius=2), ) annotated_image[...] /= 255 return annotated_image def classify_image(image): preprocessedImage = preprocess(image) with torch.no_grad(): model.eval() inputs = feature_extractor( images=preprocessedImage, return_tensors="pt") outputs = model(**inputs) logits = outputs.logits predicted_label = logits.argmax(-1).item() label = model.config.id2label[predicted_label] return label # confidences with gr.Blocks(title=">ViT - SIBI Classifier") as demo: with gr.Tab("Upload Image", id='upload-image'): with gr.Row(): uploadImage = gr.Image( type="numpy", image_mode="RGB", shape=(224, 224)) output_label = gr.Label(label="Hasil", num_top_classes=5) with gr.Row(): send_btn = gr.Button("Terjemahkan") send_btn.click(fn=classify_image, inputs=uploadImage, outputs=output_label) with gr.Tab("Capture Image", id='capture-image'): with gr.Row(): streamImage = gr.Image( type="numpy", source='webcam', image_mode="RGB", shape=(224, 224)) output_label2 = gr.Label(label="Hasil", num_top_classes=5) with gr.Row(): send_btn2 = gr.Button("Terjemahkan") send_btn2.click(fn=classify_image, inputs=streamImage, outputs=output_label2) # demo.queue(concurrency_count=3) demo.launch(debug=True)