File size: 3,091 Bytes
2545225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from transformers import ViTImageProcessor, AutoModelForImageClassification

import torch
import gradio as gr
import os
import glob
import mediapipe as mp
import numpy as np
from PIL import Image

feature_extractor = ViTImageProcessor.from_pretrained('ArdyL/VIT_SIBI_ALL')
model = AutoModelForImageClassification.from_pretrained('ArdyL/VIT_SIBI_ALL')

mp_hands = mp.solutions.hands
# mp_drawing_styles = mp.solutions.drawing_styles
# mp_holistic = mp.solutions.holistic
# mp_pose = mp.solutions.pose
mp_drawing = mp.solutions.drawing_utils
examples_dir = './'
example_files = glob.glob(os.path.join(examples_dir, '*.jpg'))


def preprocess(im):
    with mp_hands.Hands(min_detection_confidence=0.3, min_tracking_confidence=0.3) as hands:

        # Read image file with cv2 and process with face_mesh
        results = hands.process(im)
        image2 = np.array(im)
        annotated_image = image2.copy()
        annotated_image = np.empty(annotated_image.shape)
        annotated_image.fill(255)
        hand_found = bool(results.multi_hand_landmarks)
        if hand_found:
            for hand_landmarks in results.multi_hand_landmarks:
                mp_drawing.draw_landmarks(annotated_image, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                                          mp_drawing.DrawingSpec(
                                              color=(0, 0, 0), thickness=2, circle_radius=2),
                                          mp_drawing.DrawingSpec(
                                              color=(0, 0, 0), thickness=2, circle_radius=2),
                                          )

            annotated_image[...] /= 255
    return annotated_image


def classify_image(image):
    preprocessedImage = preprocess(image)
    with torch.no_grad():
        model.eval()
        inputs = feature_extractor(
            images=preprocessedImage, return_tensors="pt")
        outputs = model(**inputs)

    logits = outputs.logits
    predicted_label = logits.argmax(-1).item()
    label = model.config.id2label[predicted_label]
    return label  # confidences


with gr.Blocks(title=">ViT - SIBI Classifier") as demo:
    with gr.Tab("Upload Image", id='upload-image'):
        with gr.Row():
            uploadImage = gr.Image(
                type="numpy", image_mode="RGB", shape=(224, 224))
            output_label = gr.Label(label="Hasil", num_top_classes=5)
        with gr.Row():
            send_btn = gr.Button("Terjemahkan")
            send_btn.click(fn=classify_image, inputs=uploadImage,
                           outputs=output_label)

    with gr.Tab("Capture Image", id='capture-image'):
        with gr.Row():
            streamImage = gr.Image(
                type="numpy", source='webcam', image_mode="RGB", shape=(224, 224))
            output_label2 = gr.Label(label="Hasil", num_top_classes=5)
        with gr.Row():
            send_btn2 = gr.Button("Terjemahkan")
            send_btn2.click(fn=classify_image,
                            inputs=streamImage, outputs=output_label2)


# demo.queue(concurrency_count=3)
demo.launch(debug=True)