Spaces:

linhcuem
/

cham_diem_yolov8

Sleeping

File size: 7,484 Bytes

6f5ac87
b345bd5
851eb77
 
 
 
3a28ead
a5c8542
3a28ead
 
 
851eb77
97afcf2
6f5ac87
35410c9
 
 
bac63be
6954573
 
 
 
 
 
 
 
 
6f5ac87
 
9b71308
 
82992b1
6f5ac87
 
 
9676381
aa18569
 
 
 
6f5ac87
e8d4f9d
aa18569
 
 
 
 
 
851eb77
93f44e4
851eb77
fe3f366
47d75b8
7ad4879
ffb2d1b
39cc8c5
665ac38
fe3f366
47d75b8
1336ba6
5627083
3a7c9f0
f890670
 
 
 
 
 
 
e141787
3a7c9f0
 
 
 
 
90d7927
e141787
f07963f
f3958f6
e29b3e1
f3958f6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ad137f
f3958f6
 
 
e29b3e1
169e0bb
a5c8542
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ba8d1f
a5c8542
 
 
6f5ac87
754c31a
64d5085
dd17001
b3b8478
 
7ebef4c
aa18569
 
 
 
6f5ac87
 
64d5085
 
1206f49
4ba8d1f
151bcd3
6f5ac87
ac472a5
29f41bd
 
9b71308
 
29f41bd
 
dfa8781
29f41bd
64d5085
151bcd3
6f5ac87
05f5ea4
29f41bd
6f5ac87
 
a5c8542
 
 
 
fca7821
a5c8542

import gradio as gr
import torch
# from sahi.prediction import ObjectPrediction
# from sahi.utils.cv import visualize_object_predictions, read_image
import os 
import requests
import json
import cv2 

from PIL import Image
from huggingface_hub import hf_hub_download

from ultralyticsplus import YOLO, render_result

# from ultralyticsplus import render_result
# import requests
# import cv2

image_path = [['test_images/2a998cfb0901db5f8210.jpg','cham_diem_yolov8', 640, 0.25, 0.45],['test_images/2ce19ce0191acb44920b.jpg','cham_diem_yolov8', 640, 0.25, 0.45],
             ['test_images/2daab6ea3310e14eb801.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/4a137deefb14294a7005 (1).jpg','cham_diem_yolov8', 640, 0.25, 0.45],
             ['test_images/7e77c596436c9132c87d.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/170f914014bac6e49fab.jpg','cham_diem_yolov8', 640, 0.25, 0.45],
             ['test_images/3355ec3269c8bb96e2d9.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/546306a88052520c0b43.jpg','cham_diem_yolov8', 640, 0.25, 0.45],
             ['test_images/33148464019ed3c08a8f.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/a17a992a1cd0ce8e97c1.jpg','cham_diem_yolov8', 640, 0.25, 0.45],
             ['test_images/b5db5e42d8b80ae653a9 (1).jpg','cham_diem_yolov8', 640, 0.25, 0.45],['test_images/b8ee1f5299a84bf612b9.jpg','cham_diem_yolov8', 640, 0.25, 0.45],
             ['test_images/b272fec7783daa63f32c.jpg','cham_diem_yolov8', 640, 0.25, 0.45],['test_images/bb202b3eaec47c9a25d5.jpg','cham_diem_yolov8', 640, 0.25, 0.45],
             ['test_images/bf1e22b0a44a76142f5b.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/ea5473c5f53f27617e2e.jpg','cham_diem_yolov8', 640, 0.25, 0.45],
             ['test_images/ee106392e56837366e79.jpg','cham_diem_yolov8', 640, 0.25, 0.45], ['test_images/f88d2214a4ee76b02fff.jpg','cham_diem_yolov8', 640, 0.25, 0.45]]

# Load YOLO model
# model = YOLO('linhcuem/cham_diem_yolov8')
model = YOLO('linhcuem/chamdiemgianhang_yolov8_ver1')
# model = YOLO('linhcuem/cham_diem_yolov8_ver20')

###################################################
def yolov8_img_inference(
    image,
    model_path= None,
    image_size= 640,
    conf_threshold= 0.25,
    iou_threshold = 0.45,
):
    # model = YOLO(model_path)
    model.conf = conf_threshold
    model.iou = iou_threshold
    # model.overrides['conf'] = conf_threshold
    # model.overrides['iou'] = iou_threshold
    # model.overrides['agnostic_nms'] = False
    # model.overrides['max_det'] = 1000
    # image = read_image
    results = model.predict(image, imgsz=image_size, conf=conf_threshold, iou=iou_threshold)
    render = render_result(model=model, image=image, result=results[0])
    # get the model names list
    names = model.names
    # get the 'obj' class id
    # obj_id = list(names)[list(names.values()).index('lo_ytv')]
    # ('hop_dln','hop_jn','hop_vtg','hop_ytv','lo_kids', 'lo_ytv','loc_dln','loc_jn','loc_kids','loc_ytv')]
    # obj_id = list(names)[list(names.values()).index([0])]
    # count 'car' objects in the results
    # count_result = results[0].boxes.cls[0].item()
    #count_result = results[0]boxes.cls[0].tolist()

    object_counts = {x: 0 for x in names}
    for r in results:
        for c in r.boxes.cls:
            c = int(c)
            if c in names:
                object_counts[c] += 1
            elif c not in names:
                object_counts[c] = 1

    present_objects = object_counts.copy()

    for i in object_counts:
        if object_counts[i] < 1:
            present_objects.pop(i)
    

    return render, {names[k]: v for k, v in present_objects.items()}
    # results = model.predict(image, imgsz=image_size, return_outputs=True)
    # results = model.predict(image)
    # object_prediction_list = []
    # for _, image_results in enumerate(results):
    #     if len(image_results)!=0:
    #         image_predictions_in_xyxy_format = image_results['det']
    #         for pred in image_predictions_in_xyxy_format:
    #             x1, y1, x2, y2 = (
    #                 int(pred[0]),
    #                 int(pred[1]),
    #                 int(pred[2]),
    #                 int(pred[3]),
    #             )
    #             bbox = [x1, y1, x2, y2]
    #             score = pred[4]
    #             category_name = model.model.names[int(pred[5])]
    #             category_id = pred[5]
    #             object_prediction = ObjectPrediction(
    #                 bbox=bbox,
    #                 category_id=int(category_id),
    #                 score=score,
    #                 category_name=category_name,
    #             )
    #             object_prediction_list.append(object_prediction)

    # image = read_image(image)
    # output_image = visualize_object_predictions(image=image, object_prediction_list=object_prediction_list)
    # return output_image['image']
    # render = render_result(model=model, image=image, result=results[0])

def yolov8_vid_inference(video_path):
    cap = cv2.VideoCapture(video_path)
    while cap.isOpened():
        success, frame = cap.read()

        if success:
            frame_copy = frame.copy()
            outputs = model.predict(source=frame)
            results = outputs[0].cpu().numpy()
            for i, det in enumerate(results.boxes.xyxy):
                cv2.rectangle(
                    frame_copy,
                    (int(det[0]), int(det[1])),
                    (int(det[2]), int(det[3])),
                    color=(0, 0, 255),
                    thickness=2,
                    lineType=cv2.LINE_AA
                )
            yield cv2.cvtColor(frame_copy, cv2.COLOR_BGR2RGB)
    

inputs_vid = [
    gr.components.Video(type="filepath", label="Input Video"),
]

outputs_vid = [
    gr.components.Image(type="numpy", label="Output Image"),
              ]

interface_vid = gr.Interface(
    fn=yolov8_vid_inference,
    inputs = inputs_vid,
    outputs = outputs_vid,
    title = "Detect Thiên Việt productions",
    cache_examples = False,
    
)

inputs_image = [
    # gr.inputs.Image(type="filepath", label="Input Image"),
    gr.Image(type="pil"),
    gr.Dropdown(["linhcuem/linhcuem/chamdiemgianhang_yolov8_ver1"], 
                        default="linhcuem/chamdiemgianhang_yolov8_ver1", label="Model"),
    gr.Slider(maximum=1280, step=32, value = 640, label="Image Size"),
    gr.Slider(maximum=1.0 , step=0.05, value = 0.25, label="Confidence Threshold"),
    gr.Slider(maximum=1.0, step=0.05, value = 0.45, label="IOU Threshold"),
    
    
]

# outputs_image =gr.outputs.Image(type="filepath", label="Output Image")
# count_obj = gr.Textbox(show_label=False)

title = "Detect Thiên Việt productions"

interface_image = gr.Interface(
    fn=yolov8_img_inference,
    inputs=[
        gr.Image(type='pil'),
        gr.Dropdown(["linhcuem/chamdiemgianhang_yolov8_ver1"],
                   default="linhcuem/chamdiemgianhang_yolov8_ver1"),
        gr.Slider(maximum=1280, step=32, value=640),
        gr.Slider(maximum=1.0, step=0.05, value=0.25),
        gr.Slider(maximum=1.0, step=0.05, value=0.45),
    ],
    outputs=[gr.Image(type="pil"),gr.Textbox(show_label=False)],
    title=title,
    examples=image_path,
    cache_examples=True if image_path else False,
    
)

gr.TabbedInterface(
    [interface_image, interface_vid],
    tab_names=['Image inference', 'Video inference']
).queue().launch()

# interface_image.launch(debug=True, enable_queue=True)