File size: 3,726 Bytes
7f9a235
 
 
a6d3fdf
 
 
7f9a235
a6d3fdf
 
7f9a235
a6d3fdf
 
 
 
 
 
7f9a235
 
 
a6d3fdf
 
 
7f9a235
a6d3fdf
 
7f9a235
a6d3fdf
7f9a235
 
 
 
a6d3fdf
7f9a235
 
 
a6d3fdf
 
eabde51
 
 
 
a6d3fdf
eabde51
 
 
 
a6d3fdf
 
eabde51
 
 
a6d3fdf
 
eabde51
a6d3fdf
 
eabde51
a6d3fdf
7f9a235
d1cb523
7f9a235
a6d3fdf
 
eabde51
 
 
 
a6d3fdf
eabde51
 
 
 
a6d3fdf
 
eabde51
 
 
a6d3fdf
eabde51
 
 
 
a6d3fdf
eabde51
 
 
 
a6d3fdf
7f9a235
 
 
a6d3fdf
 
 
 
 
 
 
 
 
 
 
7f9a235
a6d3fdf
 
7f9a235
a6d3fdf
7f9a235
a6d3fdf
 
7f9a235
a6d3fdf
 
 
 
7f9a235
a6d3fdf
 
 
 
7f9a235
a6d3fdf
7f9a235
a6d3fdf
 
7f9a235
a6d3fdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import gradio as gr


def get_process_config():
    return {
        "process.numactl": gr.Checkbox(
            value=False,
            label="process.numactl",
            info="Runs the model with numactl",
        ),
        "process.numactl_kwargs": gr.Textbox(
            value="",
            label="process.numactl_kwargs",
            info="Additional python dict of kwargs to pass to numactl",
        ),
    }


def get_pytorch_config():
    return {
        "pytorch.torch_dtype": gr.Dropdown(
            value="float32",
            label="pytorch.torch_dtype",
            choices=["bfloat16", "float16", "float32", "auto"],
            info="The dtype to use for the model",
        ),
        "pytorch.torch_compile": gr.Checkbox(
            value=False,
            label="pytorch.torch_compile",
            info="Compiles the model with torch.compile",
        ),
    }


def get_onnxruntime_config():
    return {
        "onnxruntime.export": gr.Checkbox(
            value=True,
            label="onnxruntime.export",
            info="Exports the model to ONNX",
        ),
        "onnxruntime.use_cache": gr.Checkbox(
            value=True,
            label="onnxruntime.use_cache",
            info="Uses cached ONNX model if available",
        ),
        "onnxruntime.use_merged": gr.Checkbox(
            value=True,
            label="onnxruntime.use_merged",
            info="Uses merged ONNX model if available",
        ),
        "onnxruntime.torch_dtype": gr.Dropdown(
            value="float32",
            label="onnxruntime.torch_dtype",
            choices=["bfloat16", "float16", "float32", "auto"],
            info="The dtype to use for the model",
        ),
    }


def get_openvino_config():
    return {
        "openvino.export": gr.Checkbox(
            value=True,
            label="openvino.export",
            info="Exports the model to ONNX",
        ),
        "openvino.use_cache": gr.Checkbox(
            value=True,
            label="openvino.use_cache",
            info="Uses cached ONNX model if available",
        ),
        "openvino.use_merged": gr.Checkbox(
            value=True,
            label="openvino.use_merged",
            info="Uses merged ONNX model if available",
        ),
        "openvino.reshape": gr.Checkbox(
            value=False,
            label="openvino.reshape",
            info="Reshapes the model to the input shape",
        ),
        "openvino.half": gr.Checkbox(
            value=False,
            label="openvino.half",
            info="Converts model to half precision",
        ),
    }


def get_inference_config():
    return {
        "inference.warmup_runs": gr.Slider(
            step=1,
            value=10,
            minimum=0,
            maximum=10,
            label="inference.warmup_runs",
            info="Number of warmup runs",
        ),
        "inference.duration": gr.Slider(
            step=1,
            value=10,
            minimum=0,
            maximum=10,
            label="inference.duration",
            info="Minimum duration of the benchmark in seconds",
        ),
        "inference.iterations": gr.Slider(
            step=1,
            value=10,
            minimum=0,
            maximum=10,
            label="inference.iterations",
            info="Minimum number of iterations of the benchmark",
        ),
        "inference.latency": gr.Checkbox(
            value=True,
            label="inference.latency",
            info="Measures the latency of the model",
        ),
        "inference.memory": gr.Checkbox(
            value=False,
            label="inference.memory",
            info="Measures the peak memory consumption",
        ),
    }