Spaces:

ibaiGorordo
/

Lane-Shape-Prediction-with-Transformers

Runtime error

File size: 5,288 Bytes

fa14840

import sys
import cv2
import time
import numpy as np
import onnxruntime
print(onnxruntime.get_device())

lane_colors = [(249,65,68),(243,114,44),(248,150,30),(249,132,74),(249,199,79),(144,190,109),(77, 144, 142),(39, 125, 161)]
log_space = np.logspace(0,2, 50, base=1/10, endpoint=True)

class LSTR():

    def __init__(self, model_path):

        # Initialize model
        self.model = self.initialize_model(model_path)

    def __call__(self, image):

        return self.detect_lanes(image)

    def initialize_model(self, model_path):

        opts = onnxruntime.SessionOptions()
        opts.intra_op_num_threads = 16
        self.session = onnxruntime.InferenceSession(model_path,sess_options=opts)

        # Get model info
        self.getModel_input_details()
        self.getModel_output_details()

    def detect_lanes(self, image):

        input_tensor, mask_tensor = self.prepare_inputs(image)

        outputs = self.inference(input_tensor, mask_tensor)
        
        detected_lanes, good_lanes = self.process_output(outputs)

        return detected_lanes, good_lanes

    def prepare_inputs(self, img):

        self.img_height, self.img_width, self.img_channels = img.shape
        
        # Transform the image for inference
        # img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img,(self.input_width, self.input_height))

        # Scale input pixel values to -1 to 1
        mean=[0.485, 0.456, 0.406]
        std=[0.229, 0.224, 0.225]
        
        img = ((img/ 255.0 - mean) / std)
        # img = img/ 255.0

        img = img.transpose(2, 0, 1)
        input_tensor = img[np.newaxis,:,:,:].astype(np.float32)

        mask_tensor = np.zeros((1, 1, self.input_height, self.input_width), dtype=np.float32)

        return input_tensor, mask_tensor

    def inference(self, input_tensor, mask_tensor):
        start = time.time()
        outputs = self.session.run(self.output_names, {self.rgb_input_name: input_tensor, 
                                                       self.mask_input_name: mask_tensor})
        # print(time.time() - start)
        return outputs

    @staticmethod
    def softmax(x):
        """Compute softmax values for each sets of scores in x."""
        e_x = np.exp(x - np.max(x))
        return e_x / e_x.sum(axis=-1).T

    def process_output(self, outputs):  

        pred_logits = outputs[0]
        pred_curves = outputs[1]

        # Filter good lanes based on the probability
        prob = self.softmax(pred_logits)
        good_detections = np.where(np.argmax(prob,axis=-1)==1)

        pred_logits = pred_logits[good_detections]
        pred_curves = pred_curves[good_detections]

        lanes = []
        for lane_data in pred_curves:
            bounds = lane_data[:2]
            k_2, f_2, m_2, n_1, b_2, b_3 = lane_data[2:]

            # Calculate the points for the lane
            y_norm = bounds[0]+log_space*(bounds[1]-bounds[0])
            x_norm = (k_2 / (y_norm - f_2) ** 2 + m_2 / (y_norm - f_2) + n_1 + b_2 * y_norm - b_3)
            lane_points = np.vstack((x_norm*self.img_width, y_norm*self.img_height)).astype(int)
            
            lanes.append(lane_points)    

        self.lanes = lanes
        self.good_lanes = good_detections[1]

        return lanes, self.good_lanes

    def getModel_input_details(self):

        model_inputs = self.session.get_inputs()
        self.rgb_input_name = self.session.get_inputs()[0].name
        self.mask_input_name = self.session.get_inputs()[1].name

        self.input_shape = self.session.get_inputs()[0].shape
        self.input_height = self.input_shape[2]
        self.input_width = self.input_shape[3]

    def getModel_output_details(self):

        model_outputs = self.session.get_outputs()
        self.output_names = [model_outputs[i].name for i in range(len(model_outputs))]
        # print(self.output_names)

    def draw_lanes(self,input_img):

        # Write the detected line points in the image
        visualization_img = input_img.copy()

        # Draw a mask for the current lane
        right_lane = np.where(self.good_lanes==0)[0]
        left_lane = np.where(self.good_lanes==5)[0]

        if(len(left_lane) and len(right_lane)):
            
            lane_segment_img = visualization_img.copy()

            points = np.vstack((self.lanes[left_lane[0]].T,
                                np.flipud(self.lanes[right_lane[0]].T)))
            cv2.fillConvexPoly(lane_segment_img, points, color =(0,191,255))
            visualization_img = cv2.addWeighted(visualization_img, 0.7, lane_segment_img, 0.3, 0)

        for lane_num,lane_points in zip(self.good_lanes, self.lanes):
            for lane_point in lane_points.T:
                cv2.circle(visualization_img, (lane_point[0],lane_point[1]), 3, lane_colors[lane_num], -1)

        return visualization_img

if __name__ == '__main__':
    model_path='../models/model_float32.onnx'
    lane_detector = LSTR(model_path)

    img = cv2.imread("../dog_road.jpg")
    detected_lanes, lane_ids = lane_detector(img)
    print(lane_ids)

    lane_img = lane_detector.draw_lanes(img)
    cv2.namedWindow("Detected lanes", cv2.WINDOW_NORMAL)
    cv2.imshow("Detected lanes",lane_img)
    cv2.imwrite("out.jpg", lane_img)
    cv2.waitKey(0)