Spaces:
Running
Running
from transformers import ViTImageProcessor, AutoModelForImageClassification | |
import torch | |
import gradio as gr | |
import os | |
import glob | |
import mediapipe as mp | |
import numpy as np | |
from PIL import Image | |
feature_extractor = ViTImageProcessor.from_pretrained('ArdyL/VIT_SIBI_ALL') | |
model = AutoModelForImageClassification.from_pretrained('ArdyL/VIT_SIBI_ALL') | |
mp_hands = mp.solutions.hands | |
# mp_drawing_styles = mp.solutions.drawing_styles | |
# mp_holistic = mp.solutions.holistic | |
# mp_pose = mp.solutions.pose | |
mp_drawing = mp.solutions.drawing_utils | |
examples_dir = './' | |
example_files = glob.glob(os.path.join(examples_dir, '*.jpg')) | |
def preprocess(im): | |
with mp_hands.Hands(min_detection_confidence=0.3, min_tracking_confidence=0.3) as hands: | |
# Read image file with cv2 and process with face_mesh | |
results = hands.process(im) | |
image2 = np.array(im) | |
annotated_image = image2.copy() | |
annotated_image = np.empty(annotated_image.shape) | |
annotated_image.fill(255) | |
hand_found = bool(results.multi_hand_landmarks) | |
if hand_found: | |
for hand_landmarks in results.multi_hand_landmarks: | |
mp_drawing.draw_landmarks(annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS, | |
mp_drawing.DrawingSpec( | |
color=(0, 0, 0), thickness=2, circle_radius=2), | |
mp_drawing.DrawingSpec( | |
color=(0, 0, 0), thickness=2, circle_radius=2), | |
) | |
annotated_image[...] /= 255 | |
return annotated_image | |
def classify_image(image): | |
preprocessedImage = preprocess(image) | |
with torch.no_grad(): | |
model.eval() | |
inputs = feature_extractor( | |
images=preprocessedImage, return_tensors="pt") | |
outputs = model(**inputs) | |
logits = outputs.logits | |
predicted_label = logits.argmax(-1).item() | |
label = model.config.id2label[predicted_label] | |
return label # confidences | |
with gr.Blocks(title=">ViT - SIBI Classifier") as demo: | |
with gr.Tab("Upload Image", id='upload-image'): | |
with gr.Row(): | |
uploadImage = gr.Image( | |
type="numpy", image_mode="RGB", shape=(224, 224)) | |
output_label = gr.Label(label="Hasil", num_top_classes=5) | |
with gr.Row(): | |
send_btn = gr.Button("Terjemahkan") | |
send_btn.click(fn=classify_image, inputs=uploadImage, | |
outputs=output_label) | |
with gr.Tab("Capture Image", id='capture-image'): | |
with gr.Row(): | |
streamImage = gr.Image( | |
type="numpy", source='webcam', image_mode="RGB", shape=(224, 224)) | |
output_label2 = gr.Label(label="Hasil", num_top_classes=5) | |
with gr.Row(): | |
send_btn2 = gr.Button("Terjemahkan") | |
send_btn2.click(fn=classify_image, | |
inputs=streamImage, outputs=output_label2) | |
# demo.queue(concurrency_count=3) | |
demo.launch(debug=True) | |