mailseth commited on
Commit
c7ccc16
1 Parent(s): 829cd93

Upload 12 files

Browse files
3x_first_seg_yolov5l-int8_segment_0_of_2_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7387092fd873e805240a61881cda3302c52109cdfc745f1fcdcf73dda75465f7
3
+ size 33500640
3x_first_seg_yolov5l-int8_segment_1_of_2_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0bf32bfc27d442bc00f67bd2f5eaf26056daab553758ff1f3f5849da1a29861
3
+ size 15636224
all_segments_yolov5l-int8_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08c3e11fd79704aa166096080432f5ad31a9cc4a934d170580b24acbb245d7db
3
+ size 49246112
all_segments_yolov5l-int8_segment_0_of_3_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:962166de27dbd083e807fe052c7ba35df96737952d25399fb9083334079d7865
3
+ size 11043712
all_segments_yolov5l-int8_segment_1_of_3_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc1e7732872ac2401c291b97b17d22c5ce2db58e4afc49a4ff1f8aa84dec70f
3
+ size 21701888
all_segments_yolov5l-int8_segment_2_of_3_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200be3ddb51265022ce0caad8c184f8c4c8c2079f7a433d6ef8efd3aa7fb9973
3
+ size 16404800
all_segments_yolov8n_352_608px_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c6d143218d28353da1271e2e1150a86e9e557040360ad2038d298debcf42f99
3
+ size 4062609
all_segments_yolov8n_384_640px_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e37eb6264cbd77a82cd9f73495000d96a9a36fbc46db59f018682eb75960820
3
+ size 3789856
all_segments_yolov8s_384_608px_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcfe313f9994623af61ef44329648961365e79714dcf18ce533c8e3a41f70864
3
+ size 11886272
dumb_yolov5l-int8_segment_0_of_2_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b906754242f3836a1d7a1fff1642feedf127c2c88a70dcf53017963a3a42ec6
3
+ size 24789888
dumb_yolov5l-int8_segment_1_of_2_edgetpu.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4abcc82a8d4bb6ae79544569662fdf75c5135e96095fac17f4808ad5f6e92cf8
3
+ size 24305824
segment_and_test.py ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import time
4
+ import shutil
5
+ import re
6
+ import hashlib
7
+
8
+ #'''
9
+ fn_list = [
10
+ 'tf2_ssd_mobilenet_v2_coco17_ptq',
11
+ 'ssd_mobilenet_v2_coco_quant_postprocess',
12
+ 'ssdlite_mobiledet_coco_qat_postprocess',
13
+ 'ssd_mobilenet_v1_coco_quant_postprocess',
14
+ 'tf2_ssd_mobilenet_v1_fpn_640x640_coco17_ptq',
15
+ 'efficientdet_lite0_320_ptq',
16
+ 'efficientdet_lite1_384_ptq',
17
+ 'efficientdet_lite2_448_ptq',
18
+ 'efficientdet_lite3_512_ptq',
19
+ 'efficientdet_lite3x_640_ptq',
20
+ 'yolov5n-int8',
21
+ 'yolov5s-int8',
22
+ 'yolov5m-int8',
23
+ 'yolov5l-int8',
24
+
25
+ ['yolov8n_416_640px', 'yolov8n_384_640px', 'yolov8n_384_608px', 'yolov8n_352_608px'],
26
+ ['yolov8s_416_640px', 'yolov8s_384_640px', 'yolov8s_384_608px', 'yolov8s_352_608px'],
27
+ ['yolov8m_416_640px', 'yolov8m_384_640px', 'yolov8m_384_608px', 'yolov8m_352_608px'],
28
+ ['yolov8l_416_640px', 'yolov8l_384_640px', 'yolov8l_384_608px', 'yolov8l_352_608px'],
29
+
30
+ ['yolov9t_416_640px', 'yolov9t_384_640px', 'yolov9t_384_608px', 'yolov9t_352_608px', 'yolov9t_352_576px'],
31
+ ['yolov9s_416_640px', 'yolov9s_384_640px', 'yolov9s_384_608px', 'yolov9s_352_608px', 'yolov9s_352_576px'],
32
+ ['yolov9m_416_640px', 'yolov9m_384_640px', 'yolov9m_384_608px', 'yolov9m_352_608px', 'yolov9m_352_576px'],
33
+ ['yolov9c_416_640px', 'yolov9c_384_640px', 'yolov9c_384_608px', 'yolov9c_352_608px', 'yolov9c_352_576px'],
34
+
35
+ 'ipcam-general-v8'
36
+ ]
37
+
38
+ custom_args = {
39
+ 'tf2_ssd_mobilenet_v2_coco17_ptq': {
40
+ 2: ["--diff_threshold_ns","100000"]},
41
+ 'ssd_mobilenet_v2_coco_quant_postprocess': {
42
+ 5: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs","--partition_search_step","3"]},
43
+ 'ssdlite_mobiledet_coco_qat_postprocess': {
44
+ 2: ["--diff_threshold_ns","100000"]},
45
+ 'efficientdet_lite3_512_ptq': {
46
+ 2: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs"],
47
+ 3: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs"],
48
+ 4: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs"],
49
+ 5: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs"],
50
+ 6: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs"],
51
+ 7: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs"]},
52
+ 'efficientdet_lite3x_640_ptq': {
53
+ 5: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs","--partition_search_step","2"],
54
+ 6: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs","--partition_search_step","3"]},
55
+ 'yolov5n-int8': {
56
+ 5: ["--partition_search_step","2"],
57
+ 6: ["--partition_search_step","2"],
58
+ 7: ["--partition_search_step","2"],
59
+ 8: ["--partition_search_step","2"]},
60
+ 'yolov5s-int8': {
61
+ 5: ["--partition_search_step","2"],
62
+ 6: ["--partition_search_step","2"],
63
+ 7: ["--partition_search_step","2"],
64
+ 8: ["--partition_search_step","2"]},
65
+ 'yolov5m-int8': {
66
+ 5: ["--partition_search_step","2"],
67
+ 6: ["--partition_search_step","2"],
68
+ 7: ["--partition_search_step","2"],
69
+ 8: ["--partition_search_step","2"]},
70
+ 'yolov5l-int8': {
71
+ 5: ["--undefok=enable_multiple_subgraphs","--enable_multiple_subgraphs","--partition_search_step","2"],
72
+ 6: ["--partition_search_step","2"],
73
+ 7: ["--partition_search_step","2"],
74
+ 8: ["--partition_search_step","2"]},
75
+ 'yolov8m_416_640px': {
76
+ 5: ["--partition_search_step","2"],
77
+ 6: ["--partition_search_step","3"],
78
+ 7: ["--partition_search_step","4"],
79
+ 8: ["--partition_search_step","5"]},
80
+ 'yolov8l_416_640px': {
81
+ 4: ["--partition_search_step","2"],
82
+ 5: ["--partition_search_step","2"],
83
+ 6: ["--partition_search_step","3"],
84
+ 7: ["--partition_search_step","4"],
85
+ 8: ["--partition_search_step","5"]},
86
+ 'yolov9c_416_640px': {
87
+ 2: ["--delegate_search_step","10"]},
88
+ 'yolov9c_384_640px': {
89
+ 1: ["--delegate_search_step","10"],
90
+ 2: ["--delegate_search_step","10"]},
91
+ 'yolov9c_384_608px': {
92
+ 1: ["--delegate_search_step","10"],
93
+ 2: ["--delegate_search_step","10"]},
94
+ 'yolov9c_352_608px': {
95
+ 1: ["--delegate_search_step","10"],
96
+ 2: ["--delegate_search_step","10"]},
97
+ 'yolov9c_352_576px': {
98
+ 1: ["--delegate_search_step","10"],
99
+ 2: ["--delegate_search_step","10"]}}#'''
100
+
101
+ '''
102
+ fn_list = [
103
+ # 'yolov5n-int8',
104
+ # 'yolov5s-int8',
105
+ # 'yolov5m-int8',
106
+ # 'yolov5l-int8',
107
+ # 'yolov8n_full_integer_quant',
108
+ # 'yolov8s_full_integer_quant',
109
+ # 'yolov8m_full_integer_quant',
110
+ # 'yolov8l_full_integer_quant',
111
+ # 'yolov8n_480px',
112
+ # 'yolov8s_480px',
113
+ # 'yolov8m_480px',
114
+ # 'yolov8l_480px',
115
+ # 'yolov8n_512px',
116
+ # 'yolov8s_512px',
117
+ # 'yolov8m_512px',
118
+ # 'yolov8l_512px',
119
+ # 'yolov8s_544px',
120
+ # 'yolov8m_544px', # lg 1st seg
121
+ # 'yolov8l_544px', # lg 1st seg
122
+ # 'yolov8s_576px',
123
+ # 'yolov8m_576px', # lg 1st seg
124
+ # 'yolov8l_576px', # lg 1st seg
125
+ # 'yolov8s_608px',
126
+ # 'yolov8m_608px', # lg 1st seg
127
+ # 'yolov8l_608px',
128
+ # 'yolov8n_640px',
129
+ # 'yolov8s_640px',
130
+ # 'yolov8m_640px', # lg 1st seg
131
+ # 'yolov8l_640px', # lg 1st seg
132
+ # 'yolov8n_416_640px', # lg 1st seg
133
+ 'yolov8s_416_640px', # lg 1st seg
134
+ 'yolov8m_416_640px', # lg 1st seg
135
+ 'yolov8l_416_640px'] # lg 1st seg
136
+ # 'ipcam-general-v8'] #'''
137
+
138
+ '''
139
+ custom_args = {
140
+ 'yolov8n_full_integer_quant': {
141
+ 2: ["--diff_threshold_ns","100000"],
142
+ 3: ["--diff_threshold_ns","200000"]},
143
+ 'yolov8s_full_integer_quant': {
144
+ 2: ["--diff_threshold_ns","200000"]},
145
+ 'yolov8l_full_integer_quant': {
146
+ 5: ["--partition_search_step","2"]},
147
+ 'yolov8n_480px': {
148
+ 2: ["--diff_threshold_ns","100000"],
149
+ 3: ["--diff_threshold_ns","200000"]},
150
+ 'yolov8s_480px': {
151
+ 2: ["--diff_threshold_ns","200000"]},
152
+ 'yolov8m_480px': {
153
+ 5: ["--partition_search_step","2"]},
154
+ 'yolov8n_512px': {
155
+ 2: ["--diff_threshold_ns","1200000"],
156
+ 3: ["--diff_threshold_ns","600000"]},
157
+ 'yolov8s_512px': {
158
+ 2: ["--diff_threshold_ns","200000"]},
159
+ 'yolov8m_640px': {
160
+ 2: ["--diff_threshold_ns","200000", "--undefok=timeout_sec","--timeout_sec=360"]},
161
+ 'yolov8l_640px': {
162
+ 2: ["--undefok=timeout_sec","--timeout_sec=360"]},
163
+ 'yolov8n_416_640px': {
164
+ 5: ["--partition_search_step","2"]},
165
+ 'yolov8s_416_640px': {
166
+ 5: ["--partition_search_step","2"]},
167
+ 'yolov8m_416_640px': {
168
+ 5: ["--initial_lower_bound_ns","44658311","--initial_upper_bound_ns","45466138","--partition_search_step","2"],
169
+ 6: ["--initial_lower_bound_ns","39444004","--initial_upper_bound_ns","40071927","--partition_search_step","3"],
170
+ 7: ["--initial_lower_bound_ns","36028652","--initial_upper_bound_ns","37012866","--partition_search_step","4"],
171
+ 8: ["--initial_lower_bound_ns","33892323","--initial_upper_bound_ns","34856571","--partition_search_step","5"]},
172
+ 'yolov8l_416_640px': {
173
+ 5: ["--initial_lower_bound_ns","82297482","--initial_upper_bound_ns","82892528","--partition_search_step","2"],
174
+ 6: ["--initial_lower_bound_ns","69966647","--initial_upper_bound_ns","70757195","--partition_search_step","3"],
175
+ 7: ["--initial_lower_bound_ns","69067450","--initial_upper_bound_ns","69599451","--partition_search_step","4"],
176
+ 8: ["--initial_lower_bound_ns","55889854","--initial_upper_bound_ns","56444625","--partition_search_step","5"]}}#'''
177
+
178
+ '''
179
+ diff_threshold_ns = {
180
+ 'yolov8s_416_640px': {
181
+ 2: 4000000},
182
+ 'yolov8m_416_640px': {
183
+ 4: 40000000,
184
+ 5: 30000000},
185
+ 'yolov8l_416_640px': {
186
+ 7: 90000000,
187
+ 8: 70000000}}#'''
188
+
189
+ '''
190
+ custom_args = {
191
+ 'yolov8m_416_640px': {
192
+ 5: ["--partition_search_step","2"],
193
+ 6: ["--partition_search_step","3"],
194
+ 7: ["--partition_search_step","4"],
195
+ 8: ["--partition_search_step","5"]},
196
+ 'yolov8l_416_640px': {
197
+ 4: ["--partition_search_step","2"],
198
+ 5: ["--partition_search_step","2"],
199
+ 6: ["--partition_search_step","3"],
200
+ 7: ["--partition_search_step","4"],
201
+ 8: ["--partition_search_step","5"]}}#'''
202
+
203
+ seg_dir = "/home/seth/Documents/all_segments/"
204
+ seg_types = ['', '2x_first_seg/', '15x_first_seg/', '3x_first_seg/', '4x_first_seg/', '15x_last_seg/', '2x_last_seg/', 'dumb/']
205
+
206
+
207
+ def seg_exists(filename, segment_type, segment_count):
208
+ if segment_type == 'orig_code':
209
+ segment_type = ''
210
+
211
+ if segment_count == 1:
212
+ seg_list = [seg_dir+segment_type+filename+'_edgetpu.tflite']
213
+ else:
214
+ seg_list = [seg_dir+segment_type+filename+'_segment_{}_of_{}_edgetpu.tflite'.format(i, segment_count) for i in range(segment_count)]
215
+ return (seg_list, any([True for s in seg_list if not os.path.exists(s)]))
216
+
217
+ MAX_TPU_COUNT = 5
218
+
219
+ '''
220
+ # Generate segment files
221
+ for sn in range(1,MAX_TPU_COUNT+1):
222
+ flat_fn_list = []
223
+ for fn in fn_list:
224
+ if isinstance(fn, list):
225
+ flat_fn_list += fn
226
+ else:
227
+ flat_fn_list.append(fn)
228
+
229
+
230
+ for fn in flat_fn_list:
231
+ for seg_type in seg_types:
232
+ seg_list, file_missing = seg_exists(fn, seg_type, sn)
233
+
234
+ if not file_missing:
235
+ continue
236
+
237
+ if sn == 1:
238
+ cmd = ["/usr/bin/edgetpu_compiler","-s","-d","--out_dir",seg_dir+seg_type,seg_dir+fn+".tflite"]
239
+ elif 'dumb' in seg_type:
240
+ cmd = ["/usr/bin/edgetpu_compiler","-s","-d","-n",str(sn),"--out_dir",seg_dir+seg_type,seg_dir+fn+".tflite"]
241
+ elif 'saturated' in seg_type:
242
+ try:
243
+ cmd = ["libcoral/out/k8/tools/partitioner/partition_with_profiling","--output_dir",seg_dir+seg_type,"--edgetpu_compiler_binary",
244
+ "/usr/bin/edgetpu_compiler","--model_path",seg_dir+fn+".tflite","--num_segments",str(sn),
245
+ "--diff_threshold_ns", str(diff_threshold_ns[fn][sn])]
246
+ except:
247
+ # Note: "Saturated segments" is an attempt to load as much of the model as possible onto segments
248
+ # while ignoring the latency incurred by slower segments. We assume we'll be able to "speed up"
249
+ # these slower segments simply by running more copies of them. The faster segments ideally will
250
+ # be optimized to all run at roughly the same speed. Thus the overall inference throughput will
251
+ # be limited by how many multiples of the slowest segment we can run.
252
+ #
253
+ # diff_threshold_ns key entries only exist where we want to create "saturated segments". More would
254
+ # mean the model is too sparse across segments. We create saturated segments by adjusting the
255
+ # diff_threshold_ns until the compiler just starts pushing parameters off of the TPUs. Ideally
256
+ # this will result in one or two slow segments and the rest of the segments are roughly equally
257
+ # fast.
258
+ continue
259
+
260
+ else:
261
+ if '2x_first_seg' in seg_type:
262
+ #+++ b/coral/tools/partitioner/profiling_based_partitioner.cc
263
+ #@@ -190,6 +190,8 @@ int64_t ProfilingBasedPartitioner::PartitionCompileAndAnalyze(
264
+ # latencies = std::get<2>(coral::BenchmarkPartitionedModel(
265
+ # tmp_edgetpu_segment_paths, &edgetpu_contexts(), kNumInferences));
266
+ #+ latencies[0] /= 2;
267
+ # if (kUseCache) {
268
+ # for (int i = 0; i < num_segments_; ++i) {
269
+ # segment_latency_cache_[{segment_starts[i], num_ops[i]}] = latencies[i];
270
+ #@@ -211,10 +213,11 @@ std::pair<int64_t, int64_t> ProfilingBasedPartitioner::GetBounds(
271
+ # num_segments_, /*search_delegate=*/true,
272
+ # delegate_search_step))
273
+ # << "Can not compile initial partition.";
274
+ #- const auto latencies = std::get<2>(coral::BenchmarkPartitionedModel(
275
+ #+ auto latencies = std::get<2>(coral::BenchmarkPartitionedModel(
276
+ # tmp_edgetpu_segment_paths, &edgetpu_contexts(), kNumInferences));
277
+ #
278
+ # DeleteFolder(tmp_dir);
279
+ #+ latencies[0] /= 4;
280
+ #
281
+ # int64_t lower_bound = std::numeric_limits<int64_t>::max(), upper_bound = 0;
282
+ # for (auto latency : latencies) {
283
+ #
284
+ # sudo make DOCKER_IMAGE="ubuntu:20.04" DOCKER_CPUS="k8" DOCKER_TARGETS="tools" docker-build
285
+
286
+ #// Encourage each segment slower than the previous to spread out the bottlenecks
287
+ #double latency_adjust = 1.0;
288
+ #for (int i = 1; i < num_segments_; ++i)
289
+ #{
290
+ # if (latencies[i-1] < latencies[i])
291
+ # latency_adjust *= 0.97;
292
+ # latencies[i-1] *= latency_adjust;
293
+ #}
294
+ #latencies[num_segments_-1] *= latency_adjust;
295
+
296
+ partition_with_profiling_dir = "libcoral/tools.2"
297
+ elif '15x_first_seg' in seg_type:
298
+ partition_with_profiling_dir = "libcoral/tools.15"
299
+ elif '133x_first_seg' in seg_type:
300
+ partition_with_profiling_dir = "libcoral/tools.133"
301
+ elif '166x_first_seg' in seg_type:
302
+ partition_with_profiling_dir = "libcoral/tools.166"
303
+ elif '3x_first_seg' in seg_type:
304
+ partition_with_profiling_dir = "libcoral/tools.3"
305
+ elif '4x_first_seg' in seg_type:
306
+ partition_with_profiling_dir = "libcoral/tools.4"
307
+ elif '15x_last_seg' in seg_type:
308
+ partition_with_profiling_dir = "libcoral/tools.last15"
309
+ elif '2x_last_seg' in seg_type:
310
+ partition_with_profiling_dir = "libcoral/tools.last2"
311
+ elif '125x_last_inc_seg/' == seg_type:
312
+ partition_with_profiling_dir = "libcoral/tools.last125_inc_seg"
313
+ elif '2x_first_125x_last_inc_seg/' == seg_type:
314
+ partition_with_profiling_dir = "libcoral/tools.2last125_inc_seg"
315
+ elif 'inc_seg/' == seg_type:
316
+ partition_with_profiling_dir = "libcoral/tools.inc_seg"
317
+ else:
318
+ partition_with_profiling_dir = "libcoral/tools.orig"
319
+
320
+ cmd = [partition_with_profiling_dir+"/partitioner/partition_with_profiling","--output_dir",seg_dir+seg_type,"--edgetpu_compiler_binary",
321
+ "/usr/bin/edgetpu_compiler","--model_path",seg_dir+fn+".tflite","--num_segments",str(sn)]
322
+
323
+ try:
324
+ cmd += custom_args[fn][sn]
325
+ except:
326
+ pass
327
+
328
+ print(cmd)
329
+ subprocess.run(cmd)#'''
330
+
331
+
332
+ seg_types += ['133x_first_seg/', '166x_first_seg/', 'inc_seg/', '125x_last_inc_seg/', '2x_first_125x_last_inc_seg/']
333
+
334
+ # Test timings
335
+ fin_timings = {}
336
+ fin_fnames = {}
337
+ for fn in fn_list:
338
+ if isinstance(fn, list):
339
+ fn_size_list = fn
340
+ fn = fn[0]
341
+ else:
342
+ fn_size_list = [fn]
343
+
344
+ timings = []
345
+ fin_timings[fn] = {}
346
+ fin_fnames[fn] = {}
347
+
348
+ for num_tpus in range(1,MAX_TPU_COUNT+1):
349
+
350
+ for this_fn in fn_size_list:
351
+ for seg_type in seg_types:
352
+ max_seg = 0
353
+ for sn in range(1,num_tpus+1):
354
+ # No need to run many slow single TPU tests, just one
355
+ if sn == 1 and seg_type != '':
356
+ continue
357
+
358
+ # Test against orig code
359
+ exe_file = "/home/seth/CodeProject.AI-ObjectDetectionCoral/objectdetection_coral_multitpu.py"
360
+
361
+ # Get file types
362
+ seg_list, file_missing = seg_exists(this_fn, seg_type, sn)
363
+
364
+ if file_missing:
365
+ continue
366
+ max_seg = sn
367
+
368
+ cmd = ["python3.9",exe_file,"--model"] + \
369
+ seg_list + ["--labels","coral/pycoral/test_data/coco_labels.txt","--input","/home/seth/coral/pycoral/test_data/grace_hopper.bmp",
370
+ "--count","4000","--num-tpus",str(num_tpus)]
371
+ print(cmd)
372
+
373
+ # Clock runtime
374
+ #start_time = time.perf_counter()
375
+ #subprocess.run(cmd)
376
+ #ms_time = 1000 * (time.perf_counter() - start_time) / 4000 # ms * total time / iterations
377
+
378
+ # Last quarter runtime
379
+ try:
380
+ c = subprocess.run(cmd, check=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=3600*2)
381
+ except subprocess.TimeoutExpired:
382
+ print("Timed out!")
383
+ continue
384
+ print(c.stdout)
385
+ print(c.stderr)
386
+ ms_time = float(re.compile(r'threads; ([\d\.]+)ms ea').findall(c.stderr)[0])
387
+ mpps_time = float(re.compile(r'; ([\d\.]+) tensor MPx').findall(c.stderr)[0])
388
+
389
+ timings.append((ms_time, num_tpus, this_fn, seg_type, sn, mpps_time))
390
+ subprocess.run(['uptime'])
391
+
392
+ timings = sorted(timings, key=lambda t: t[5], reverse=True)
393
+ if not any(timings):
394
+ continue
395
+
396
+ # Print the top ten
397
+ print(f"TIMINGS FOR {num_tpus} TPUs AND {fn} MODEL:")
398
+ for t in range(min(10,len(timings))):
399
+ print(timings[t])
400
+
401
+ # Get best segments, but
402
+ # Skip if it's not 'orig_code' and > 1 segment
403
+ t = [t for t in timings if t[3] != 'orig_code'][0]
404
+ fin_timings[fn][num_tpus] = timings[0]
405
+
406
+ # Add segment to the final list
407
+ # Copy best to local dir
408
+ seg_list, _ = seg_exists(t[2], t[3], t[4])
409
+ fin_fnames[fn][num_tpus] = []
410
+ for s in seg_list:
411
+ file_components = os.path.normpath(s).split("/")
412
+ out_fname = file_components[-2]+"_"+file_components[-1]
413
+ shutil.copyfile(s, out_fname)
414
+ checksum = hashlib.md5(open(out_fname,'rb').read()).hexdigest()
415
+ fin_fnames[fn][num_tpus].append((out_fname, checksum))
416
+
417
+ # Create archive for this model / TPU count
418
+ #if len(fin_fnames[fn][num_tpus]) > 1 or num_tpus == 1:
419
+ # zip_name = f'objectdetection-{fn}-{num_tpus}-edgetpu.zip'
420
+ # cmd = ['zip', '-9', zip_name] + fin_fnames[fn][num_tpus]
421
+ # print(cmd)
422
+ # if os.path.exists(zip_name):
423
+ # os.unlink(zip_name)
424
+ # subprocess.run(cmd)
425
+
426
+ print(fin_timings)
427
+ print(fin_fnames)
428
+
429
+ # Pretty print all of the segments we've timed and selected
430
+ for fn, v in fin_fnames.items():
431
+ print(" '%s': {" % fn)
432
+ for tpu_count, timing in fin_timings[fn].items():
433
+ if tpu_count in v:
434
+ seg_str = f"{len(v[tpu_count])} segments"
435
+ else:
436
+ seg_str = "1 segment "
437
+
438
+ fps = 1000.0 / timing[0]
439
+
440
+ print(f"#{timing[0]:6.1f} ms/inference ({fps:5.1f} FPS;{timing[5]:5.1f} tensor MPx/sec) for {tpu_count} TPUs using {seg_str}: {timing[2]}")
441
+
442
+ for tpu_count, out_fnames in v.items():
443
+ if len(out_fnames) > 1:
444
+ print(f"{tpu_count}: "+str(out_fnames)+",")
445
+ if 1 in v:
446
+ print(f" '_tflite': '{v[1][0]}'")
447
+ print(" },")