Spaces:
Runtime error
Runtime error
Dr. Richard Zinck
commited on
Commit
·
b87f798
1
Parent(s):
5788d0e
Basic files
Browse files- LICENSE +360 -0
- audio.py +118 -0
- bat_gui.py +676 -0
- bat_ident.py +616 -0
- config.py +257 -0
- model.py +389 -0
- requirements.txt +12 -0
- segments.py +305 -0
LICENSE
ADDED
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International
|
2 |
+
Public License
|
3 |
+
|
4 |
+
By exercising the Licensed Rights (defined below), You accept and agree
|
5 |
+
to be bound by the terms and conditions of this Creative Commons
|
6 |
+
Attribution-NonCommercial-ShareAlike 4.0 International Public License
|
7 |
+
("Public License"). To the extent this Public License may be
|
8 |
+
interpreted as a contract, You are granted the Licensed Rights in
|
9 |
+
consideration of Your acceptance of these terms and conditions, and the
|
10 |
+
Licensor grants You such rights in consideration of benefits the
|
11 |
+
Licensor receives from making the Licensed Material available under
|
12 |
+
these terms and conditions.
|
13 |
+
|
14 |
+
|
15 |
+
Section 1 -- Definitions.
|
16 |
+
|
17 |
+
a. Adapted Material means material subject to Copyright and Similar
|
18 |
+
Rights that is derived from or based upon the Licensed Material
|
19 |
+
and in which the Licensed Material is translated, altered,
|
20 |
+
arranged, transformed, or otherwise modified in a manner requiring
|
21 |
+
permission under the Copyright and Similar Rights held by the
|
22 |
+
Licensor. For purposes of this Public License, where the Licensed
|
23 |
+
Material is a musical work, performance, or sound recording,
|
24 |
+
Adapted Material is always produced where the Licensed Material is
|
25 |
+
synched in timed relation with a moving image.
|
26 |
+
|
27 |
+
b. Adapter's License means the license You apply to Your Copyright
|
28 |
+
and Similar Rights in Your contributions to Adapted Material in
|
29 |
+
accordance with the terms and conditions of this Public License.
|
30 |
+
|
31 |
+
c. BY-NC-SA Compatible License means a license listed at
|
32 |
+
creativecommons.org/compatiblelicenses, approved by Creative
|
33 |
+
Commons as essentially the equivalent of this Public License.
|
34 |
+
|
35 |
+
d. Copyright and Similar Rights means copyright and/or similar rights
|
36 |
+
closely related to copyright including, without limitation,
|
37 |
+
performance, broadcast, sound recording, and Sui Generis Database
|
38 |
+
Rights, without regard to how the rights are labeled or
|
39 |
+
categorized. For purposes of this Public License, the rights
|
40 |
+
specified in Section 2(b)(1)-(2) are not Copyright and Similar
|
41 |
+
Rights.
|
42 |
+
|
43 |
+
e. Effective Technological Measures means those measures that, in the
|
44 |
+
absence of proper authority, may not be circumvented under laws
|
45 |
+
fulfilling obligations under Article 11 of the WIPO Copyright
|
46 |
+
Treaty adopted on December 20, 1996, and/or similar international
|
47 |
+
agreements.
|
48 |
+
|
49 |
+
f. Exceptions and Limitations means fair use, fair dealing, and/or
|
50 |
+
any other exception or limitation to Copyright and Similar Rights
|
51 |
+
that applies to Your use of the Licensed Material.
|
52 |
+
|
53 |
+
g. License Elements means the license attributes listed in the name
|
54 |
+
of a Creative Commons Public License. The License Elements of this
|
55 |
+
Public License are Attribution, NonCommercial, and ShareAlike.
|
56 |
+
|
57 |
+
h. Licensed Material means the artistic or literary work, database,
|
58 |
+
or other material to which the Licensor applied this Public
|
59 |
+
License.
|
60 |
+
|
61 |
+
i. Licensed Rights means the rights granted to You subject to the
|
62 |
+
terms and conditions of this Public License, which are limited to
|
63 |
+
all Copyright and Similar Rights that apply to Your use of the
|
64 |
+
Licensed Material and that the Licensor has authority to license.
|
65 |
+
|
66 |
+
j. Licensor means the individual(s) or entity(ies) granting rights
|
67 |
+
under this Public License.
|
68 |
+
|
69 |
+
k. NonCommercial means not primarily intended for or directed towards
|
70 |
+
commercial advantage or monetary compensation. For purposes of
|
71 |
+
this Public License, the exchange of the Licensed Material for
|
72 |
+
other material subject to Copyright and Similar Rights by digital
|
73 |
+
file-sharing or similar means is NonCommercial provided there is
|
74 |
+
no payment of monetary compensation in connection with the
|
75 |
+
exchange.
|
76 |
+
|
77 |
+
l. Share means to provide material to the public by any means or
|
78 |
+
process that requires permission under the Licensed Rights, such
|
79 |
+
as reproduction, public display, public performance, distribution,
|
80 |
+
dissemination, communication, or importation, and to make material
|
81 |
+
available to the public including in ways that members of the
|
82 |
+
public may access the material from a place and at a time
|
83 |
+
individually chosen by them.
|
84 |
+
|
85 |
+
m. Sui Generis Database Rights means rights other than copyright
|
86 |
+
resulting from Directive 96/9/EC of the European Parliament and of
|
87 |
+
the Council of 11 March 1996 on the legal protection of databases,
|
88 |
+
as amended and/or succeeded, as well as other essentially
|
89 |
+
equivalent rights anywhere in the world.
|
90 |
+
|
91 |
+
n. You means the individual or entity exercising the Licensed Rights
|
92 |
+
under this Public License. Your has a corresponding meaning.
|
93 |
+
|
94 |
+
|
95 |
+
Section 2 -- Scope.
|
96 |
+
|
97 |
+
a. License grant.
|
98 |
+
|
99 |
+
1. Subject to the terms and conditions of this Public License,
|
100 |
+
the Licensor hereby grants You a worldwide, royalty-free,
|
101 |
+
non-sublicensable, non-exclusive, irrevocable license to
|
102 |
+
exercise the Licensed Rights in the Licensed Material to:
|
103 |
+
|
104 |
+
a. reproduce and Share the Licensed Material, in whole or
|
105 |
+
in part, for NonCommercial purposes only; and
|
106 |
+
|
107 |
+
b. produce, reproduce, and Share Adapted Material for
|
108 |
+
NonCommercial purposes only.
|
109 |
+
|
110 |
+
2. Exceptions and Limitations. For the avoidance of doubt, where
|
111 |
+
Exceptions and Limitations apply to Your use, this Public
|
112 |
+
License does not apply, and You do not need to comply with
|
113 |
+
its terms and conditions.
|
114 |
+
|
115 |
+
3. Term. The term of this Public License is specified in Section
|
116 |
+
6(a).
|
117 |
+
|
118 |
+
4. Media and formats; technical modifications allowed. The
|
119 |
+
Licensor authorizes You to exercise the Licensed Rights in
|
120 |
+
all media and formats whether now known or hereafter created,
|
121 |
+
and to make technical modifications necessary to do so. The
|
122 |
+
Licensor waives and/or agrees not to assert any right or
|
123 |
+
authority to forbid You from making technical modifications
|
124 |
+
necessary to exercise the Licensed Rights, including
|
125 |
+
technical modifications necessary to circumvent Effective
|
126 |
+
Technological Measures. For purposes of this Public License,
|
127 |
+
simply making modifications authorized by this Section 2(a)
|
128 |
+
(4) never produces Adapted Material.
|
129 |
+
|
130 |
+
5. Downstream recipients.
|
131 |
+
|
132 |
+
a. Offer from the Licensor -- Licensed Material. Every
|
133 |
+
recipient of the Licensed Material automatically
|
134 |
+
receives an offer from the Licensor to exercise the
|
135 |
+
Licensed Rights under the terms and conditions of this
|
136 |
+
Public License.
|
137 |
+
|
138 |
+
b. Additional offer from the Licensor -- Adapted Material.
|
139 |
+
Every recipient of Adapted Material from You
|
140 |
+
automatically receives an offer from the Licensor to
|
141 |
+
exercise the Licensed Rights in the Adapted Material
|
142 |
+
under the conditions of the Adapter's License You apply.
|
143 |
+
|
144 |
+
c. No downstream restrictions. You may not offer or impose
|
145 |
+
any additional or different terms or conditions on, or
|
146 |
+
apply any Effective Technological Measures to, the
|
147 |
+
Licensed Material if doing so restricts exercise of the
|
148 |
+
Licensed Rights by any recipient of the Licensed
|
149 |
+
Material.
|
150 |
+
|
151 |
+
6. No endorsement. Nothing in this Public License constitutes or
|
152 |
+
may be construed as permission to assert or imply that You
|
153 |
+
are, or that Your use of the Licensed Material is, connected
|
154 |
+
with, or sponsored, endorsed, or granted official status by,
|
155 |
+
the Licensor or others designated to receive attribution as
|
156 |
+
provided in Section 3(a)(1)(A)(i).
|
157 |
+
|
158 |
+
b. Other rights.
|
159 |
+
|
160 |
+
1. Moral rights, such as the right of integrity, are not
|
161 |
+
licensed under this Public License, nor are publicity,
|
162 |
+
privacy, and/or other similar personality rights; however, to
|
163 |
+
the extent possible, the Licensor waives and/or agrees not to
|
164 |
+
assert any such rights held by the Licensor to the limited
|
165 |
+
extent necessary to allow You to exercise the Licensed
|
166 |
+
Rights, but not otherwise.
|
167 |
+
|
168 |
+
2. Patent and trademark rights are not licensed under this
|
169 |
+
Public License.
|
170 |
+
|
171 |
+
3. To the extent possible, the Licensor waives any right to
|
172 |
+
collect royalties from You for the exercise of the Licensed
|
173 |
+
Rights, whether directly or through a collecting society
|
174 |
+
under any voluntary or waivable statutory or compulsory
|
175 |
+
licensing scheme. In all other cases the Licensor expressly
|
176 |
+
reserves any right to collect such royalties, including when
|
177 |
+
the Licensed Material is used other than for NonCommercial
|
178 |
+
purposes.
|
179 |
+
|
180 |
+
|
181 |
+
Section 3 -- License Conditions.
|
182 |
+
|
183 |
+
Your exercise of the Licensed Rights is expressly made subject to the
|
184 |
+
following conditions.
|
185 |
+
|
186 |
+
a. Attribution.
|
187 |
+
|
188 |
+
1. If You Share the Licensed Material (including in modified
|
189 |
+
form), You must:
|
190 |
+
|
191 |
+
a. retain the following if it is supplied by the Licensor
|
192 |
+
with the Licensed Material:
|
193 |
+
|
194 |
+
i. identification of the creator(s) of the Licensed
|
195 |
+
Material and any others designated to receive
|
196 |
+
attribution, in any reasonable manner requested by
|
197 |
+
the Licensor (including by pseudonym if
|
198 |
+
designated);
|
199 |
+
|
200 |
+
ii. a copyright notice;
|
201 |
+
|
202 |
+
iii. a notice that refers to this Public License;
|
203 |
+
|
204 |
+
iv. a notice that refers to the disclaimer of
|
205 |
+
warranties;
|
206 |
+
|
207 |
+
v. a URI or hyperlink to the Licensed Material to the
|
208 |
+
extent reasonably practicable;
|
209 |
+
|
210 |
+
b. indicate if You modified the Licensed Material and
|
211 |
+
retain an indication of any previous modifications; and
|
212 |
+
|
213 |
+
c. indicate the Licensed Material is licensed under this
|
214 |
+
Public License, and include the text of, or the URI or
|
215 |
+
hyperlink to, this Public License.
|
216 |
+
|
217 |
+
2. You may satisfy the conditions in Section 3(a)(1) in any
|
218 |
+
reasonable manner based on the medium, means, and context in
|
219 |
+
which You Share the Licensed Material. For example, it may be
|
220 |
+
reasonable to satisfy the conditions by providing a URI or
|
221 |
+
hyperlink to a resource that includes the required
|
222 |
+
information.
|
223 |
+
3. If requested by the Licensor, You must remove any of the
|
224 |
+
information required by Section 3(a)(1)(A) to the extent
|
225 |
+
reasonably practicable.
|
226 |
+
|
227 |
+
b. ShareAlike.
|
228 |
+
|
229 |
+
In addition to the conditions in Section 3(a), if You Share
|
230 |
+
Adapted Material You produce, the following conditions also apply.
|
231 |
+
|
232 |
+
1. The Adapter's License You apply must be a Creative Commons
|
233 |
+
license with the same License Elements, this version or
|
234 |
+
later, or a BY-NC-SA Compatible License.
|
235 |
+
|
236 |
+
2. You must include the text of, or the URI or hyperlink to, the
|
237 |
+
Adapter's License You apply. You may satisfy this condition
|
238 |
+
in any reasonable manner based on the medium, means, and
|
239 |
+
context in which You Share Adapted Material.
|
240 |
+
|
241 |
+
3. You may not offer or impose any additional or different terms
|
242 |
+
or conditions on, or apply any Effective Technological
|
243 |
+
Measures to, Adapted Material that restrict exercise of the
|
244 |
+
rights granted under the Adapter's License You apply.
|
245 |
+
|
246 |
+
|
247 |
+
Section 4 -- Sui Generis Database Rights.
|
248 |
+
|
249 |
+
Where the Licensed Rights include Sui Generis Database Rights that
|
250 |
+
apply to Your use of the Licensed Material:
|
251 |
+
|
252 |
+
a. for the avoidance of doubt, Section 2(a)(1) grants You the right
|
253 |
+
to extract, reuse, reproduce, and Share all or a substantial
|
254 |
+
portion of the contents of the database for NonCommercial purposes
|
255 |
+
only;
|
256 |
+
|
257 |
+
b. if You include all or a substantial portion of the database
|
258 |
+
contents in a database in which You have Sui Generis Database
|
259 |
+
Rights, then the database in which You have Sui Generis Database
|
260 |
+
Rights (but not its individual contents) is Adapted Material,
|
261 |
+
including for purposes of Section 3(b); and
|
262 |
+
|
263 |
+
c. You must comply with the conditions in Section 3(a) if You Share
|
264 |
+
all or a substantial portion of the contents of the database.
|
265 |
+
|
266 |
+
For the avoidance of doubt, this Section 4 supplements and does not
|
267 |
+
replace Your obligations under this Public License where the Licensed
|
268 |
+
Rights include other Copyright and Similar Rights.
|
269 |
+
|
270 |
+
|
271 |
+
Section 5 -- Disclaimer of Warranties and Limitation of Liability.
|
272 |
+
|
273 |
+
a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
|
274 |
+
EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
|
275 |
+
AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
|
276 |
+
ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
|
277 |
+
IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
|
278 |
+
WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
|
279 |
+
PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
|
280 |
+
ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
|
281 |
+
KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
|
282 |
+
ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
|
283 |
+
|
284 |
+
b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
|
285 |
+
TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
|
286 |
+
NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
|
287 |
+
INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
|
288 |
+
COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
|
289 |
+
USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
|
290 |
+
ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
|
291 |
+
DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
|
292 |
+
IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
|
293 |
+
|
294 |
+
c. The disclaimer of warranties and limitation of liability provided
|
295 |
+
above shall be interpreted in a manner that, to the extent
|
296 |
+
possible, most closely approximates an absolute disclaimer and
|
297 |
+
waiver of all liability.
|
298 |
+
|
299 |
+
|
300 |
+
Section 6 -- Term and Termination.
|
301 |
+
|
302 |
+
a. This Public License applies for the term of the Copyright and
|
303 |
+
Similar Rights licensed here. However, if You fail to comply with
|
304 |
+
this Public License, then Your rights under this Public License
|
305 |
+
terminate automatically.
|
306 |
+
|
307 |
+
b. Where Your right to use the Licensed Material has terminated under
|
308 |
+
Section 6(a), it reinstates:
|
309 |
+
|
310 |
+
1. automatically as of the date the violation is cured, provided
|
311 |
+
it is cured within 30 days of Your discovery of the
|
312 |
+
violation; or
|
313 |
+
|
314 |
+
2. upon express reinstatement by the Licensor.
|
315 |
+
|
316 |
+
For the avoidance of doubt, this Section 6(b) does not affect any
|
317 |
+
right the Licensor may have to seek remedies for Your violations
|
318 |
+
of this Public License.
|
319 |
+
|
320 |
+
c. For the avoidance of doubt, the Licensor may also offer the
|
321 |
+
Licensed Material under separate terms or conditions or stop
|
322 |
+
distributing the Licensed Material at any time; however, doing so
|
323 |
+
will not terminate this Public License.
|
324 |
+
|
325 |
+
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
|
326 |
+
License.
|
327 |
+
|
328 |
+
|
329 |
+
Section 7 -- Other Terms and Conditions.
|
330 |
+
|
331 |
+
a. The Licensor shall not be bound by any additional or different
|
332 |
+
terms or conditions communicated by You unless expressly agreed.
|
333 |
+
|
334 |
+
b. Any arrangements, understandings, or agreements regarding the
|
335 |
+
Licensed Material not stated herein are separate from and
|
336 |
+
independent of the terms and conditions of this Public License.
|
337 |
+
|
338 |
+
|
339 |
+
Section 8 -- Interpretation.
|
340 |
+
|
341 |
+
a. For the avoidance of doubt, this Public License does not, and
|
342 |
+
shall not be interpreted to, reduce, limit, restrict, or impose
|
343 |
+
conditions on any use of the Licensed Material that could lawfully
|
344 |
+
be made without permission under this Public License.
|
345 |
+
|
346 |
+
b. To the extent possible, if any provision of this Public License is
|
347 |
+
deemed unenforceable, it shall be automatically reformed to the
|
348 |
+
minimum extent necessary to make it enforceable. If the provision
|
349 |
+
cannot be reformed, it shall be severed from this Public License
|
350 |
+
without affecting the enforceability of the remaining terms and
|
351 |
+
conditions.
|
352 |
+
|
353 |
+
c. No term or condition of this Public License will be waived and no
|
354 |
+
failure to comply consented to unless expressly agreed to by the
|
355 |
+
Licensor.
|
356 |
+
|
357 |
+
d. Nothing in this Public License constitutes or may be interpreted
|
358 |
+
as a limitation upon, or waiver of, any privileges and immunities
|
359 |
+
that apply to the Licensor or You, including from the legal
|
360 |
+
processes of any jurisdiction or authority.
|
audio.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module containing audio helper functions.
|
2 |
+
"""
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
import config as cfg
|
6 |
+
|
7 |
+
RANDOM = np.random.RandomState(cfg.RANDOM_SEED)
|
8 |
+
|
9 |
+
|
10 |
+
def openAudioFile(path: str, sample_rate=cfg.SAMPLE_RATE, offset=0.0, duration=None):
|
11 |
+
"""Open an audio file.
|
12 |
+
|
13 |
+
Opens an audio file with librosa and the given settings.
|
14 |
+
|
15 |
+
Args:
|
16 |
+
path: Path to the audio file.
|
17 |
+
sample_rate: The sample rate at which the file should be processed.
|
18 |
+
offset: The starting offset.
|
19 |
+
duration: Maximum duration of the loaded content.
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
Returns the audio time series and the sampling rate.
|
23 |
+
"""
|
24 |
+
# Open file with librosa (uses ffmpeg or libav)
|
25 |
+
import librosa
|
26 |
+
|
27 |
+
sig, rate = librosa.load(path, sr=sample_rate, offset=offset, duration=duration, mono=True, res_type="kaiser_fast")
|
28 |
+
|
29 |
+
return sig, rate
|
30 |
+
|
31 |
+
|
32 |
+
def saveSignal(sig, fname: str):
|
33 |
+
"""Saves a signal to file.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
sig: The signal to be saved.
|
37 |
+
fname: The file path.
|
38 |
+
"""
|
39 |
+
import soundfile as sf
|
40 |
+
|
41 |
+
sf.write(fname, sig, cfg.SAMPLE_RATE, "PCM_16")
|
42 |
+
|
43 |
+
|
44 |
+
def noise(sig, shape, amount=None):
|
45 |
+
"""Creates noise.
|
46 |
+
|
47 |
+
Creates a noise vector with the given shape.
|
48 |
+
|
49 |
+
Args:
|
50 |
+
sig: The original audio signal.
|
51 |
+
shape: Shape of the noise.
|
52 |
+
amount: The noise intensity.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
An numpy array of noise with the given shape.
|
56 |
+
"""
|
57 |
+
# Random noise intensity
|
58 |
+
if amount == None:
|
59 |
+
amount = RANDOM.uniform(0.1, 0.5)
|
60 |
+
|
61 |
+
# Create Gaussian noise
|
62 |
+
try:
|
63 |
+
noise = RANDOM.normal(min(sig) * amount, max(sig) * amount, shape)
|
64 |
+
except:
|
65 |
+
noise = np.zeros(shape)
|
66 |
+
|
67 |
+
return noise.astype("float32")
|
68 |
+
|
69 |
+
|
70 |
+
def splitSignal(sig, rate, seconds, overlap, minlen):
|
71 |
+
"""Split signal with overlap.
|
72 |
+
|
73 |
+
Args:
|
74 |
+
sig: The original signal to be split.
|
75 |
+
rate: The sampling rate.
|
76 |
+
seconds: The duration of a segment.
|
77 |
+
overlap: The overlapping seconds of segments.
|
78 |
+
minlen: Minimum length of a split.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
A list of splits.
|
82 |
+
"""
|
83 |
+
sig_splits = []
|
84 |
+
|
85 |
+
for i in range(0, len(sig), int((seconds - overlap) * rate)):
|
86 |
+
split = sig[i : i + int(seconds * rate)]
|
87 |
+
|
88 |
+
# End of signal?
|
89 |
+
if len(split) < int(minlen * rate):
|
90 |
+
break
|
91 |
+
|
92 |
+
# Signal chunk too short?
|
93 |
+
if len(split) < int(rate * seconds):
|
94 |
+
split = np.hstack((split, noise(split, (int(rate * seconds) - len(split)), 0.5)))
|
95 |
+
|
96 |
+
sig_splits.append(split)
|
97 |
+
|
98 |
+
return sig_splits
|
99 |
+
|
100 |
+
|
101 |
+
def cropCenter(sig, rate, seconds):
|
102 |
+
"""Crop signal to center.
|
103 |
+
|
104 |
+
Args:
|
105 |
+
sig: The original signal.
|
106 |
+
rate: The sampling rate.
|
107 |
+
seconds: The length of the signal.
|
108 |
+
"""
|
109 |
+
if len(sig) > int(seconds * rate):
|
110 |
+
start = int((len(sig) - int(seconds * rate)) / 2)
|
111 |
+
end = start + int(seconds * rate)
|
112 |
+
sig = sig[start:end]
|
113 |
+
|
114 |
+
# Pad with noise
|
115 |
+
elif len(sig) < int(seconds * rate):
|
116 |
+
sig = np.hstack((sig, noise(sig, (int(seconds * rate) - len(sig)), 0.5)))
|
117 |
+
|
118 |
+
return sig
|
bat_gui.py
ADDED
@@ -0,0 +1,676 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import concurrent.futures
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
from multiprocessing import freeze_support
|
5 |
+
import gradio as gr
|
6 |
+
import webview
|
7 |
+
import bat_ident
|
8 |
+
import config as cfg
|
9 |
+
import segments
|
10 |
+
import utils
|
11 |
+
import logging
|
12 |
+
import librosa
|
13 |
+
logging.basicConfig(filename='bat_gui.log', encoding='utf-8', level=logging.DEBUG)
|
14 |
+
|
15 |
+
_WINDOW: webview.Window
|
16 |
+
|
17 |
+
|
18 |
+
_AREA_ONE = "EU"
|
19 |
+
_AREA_TWO = "Bavaria"
|
20 |
+
_AREA_THREE = "USA"
|
21 |
+
_AREA_FOUR = "Scotland"
|
22 |
+
_AREA_FIFE = "UK"
|
23 |
+
|
24 |
+
#
|
25 |
+
# MODEL part mixed with CONTROLER
|
26 |
+
#
|
27 |
+
OUTPUT_TYPE_MAP = {"Raven selection table": "table", "Audacity": "audacity", "R": "r", "CSV": "csv"}
|
28 |
+
ORIGINAL_MODEL_PATH = cfg.MODEL_PATH
|
29 |
+
ORIGINAL_MDATA_MODEL_PATH = cfg.MDATA_MODEL_PATH
|
30 |
+
ORIGINAL_LABELS_FILE = cfg.LABELS_FILE
|
31 |
+
ORIGINAL_TRANSLATED_LABELS_PATH = cfg.TRANSLATED_BAT_LABELS_PATH # cfg.TRANSLATED_LABELS_PATH
|
32 |
+
|
33 |
+
def analyzeFile_wrapper(entry):
|
34 |
+
#return (entry[0], analyze.analyzeFile(entry))
|
35 |
+
return (entry[0], bat_ident.analyze_file(entry))
|
36 |
+
def validate(value, msg):
|
37 |
+
"""Checks if the value ist not falsy.
|
38 |
+
If the value is falsy, an error will be raised.
|
39 |
+
Args:
|
40 |
+
value: Value to be tested.
|
41 |
+
msg: Message in case of an error.
|
42 |
+
"""
|
43 |
+
if not value:
|
44 |
+
raise gr.Error(msg)
|
45 |
+
|
46 |
+
|
47 |
+
def runBatchAnalysis(
|
48 |
+
output_path,
|
49 |
+
confidence,
|
50 |
+
sensitivity,
|
51 |
+
overlap,
|
52 |
+
species_list_choice,
|
53 |
+
locale,
|
54 |
+
batch_size,
|
55 |
+
threads,
|
56 |
+
input_dir,
|
57 |
+
output_type_radio,
|
58 |
+
progress=gr.Progress(),
|
59 |
+
):
|
60 |
+
validate(input_dir, "Please select a directory.")
|
61 |
+
batch_size = int(batch_size)
|
62 |
+
threads = int(threads)
|
63 |
+
|
64 |
+
return runAnalysis(
|
65 |
+
species_list_choice,
|
66 |
+
None,
|
67 |
+
output_path,
|
68 |
+
confidence,
|
69 |
+
sensitivity,
|
70 |
+
overlap,
|
71 |
+
output_type_radio,
|
72 |
+
"en" if not locale else locale,
|
73 |
+
batch_size,
|
74 |
+
threads,
|
75 |
+
input_dir,
|
76 |
+
progress,
|
77 |
+
)
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
def runSingleFileAnalysis(input_path,
|
83 |
+
confidence,
|
84 |
+
sensitivity,
|
85 |
+
overlap,
|
86 |
+
species_list_choice,
|
87 |
+
locale):
|
88 |
+
validate(input_path, "Please select a file.")
|
89 |
+
logging.info('first level')
|
90 |
+
return runAnalysis(
|
91 |
+
species_list_choice,
|
92 |
+
input_path,
|
93 |
+
None,
|
94 |
+
confidence,
|
95 |
+
sensitivity,
|
96 |
+
overlap,
|
97 |
+
"csv",
|
98 |
+
"en" if not locale else locale,
|
99 |
+
1,
|
100 |
+
4,
|
101 |
+
None,
|
102 |
+
progress=None,
|
103 |
+
)
|
104 |
+
|
105 |
+
def runAnalysis(
|
106 |
+
species_list_choice: str,
|
107 |
+
input_path: str,
|
108 |
+
output_path: str | None,
|
109 |
+
confidence: float,
|
110 |
+
sensitivity: float,
|
111 |
+
overlap: float,
|
112 |
+
output_type: str,
|
113 |
+
locale: str,
|
114 |
+
batch_size: int,
|
115 |
+
threads: int,
|
116 |
+
input_dir: str,
|
117 |
+
progress: gr.Progress | None,
|
118 |
+
):
|
119 |
+
"""Starts the analysis.
|
120 |
+
Args:
|
121 |
+
input_path: Either a file or directory.
|
122 |
+
output_path: The output path for the result, if None the input_path is used
|
123 |
+
confidence: The selected minimum confidence.
|
124 |
+
sensitivity: The selected sensitivity.
|
125 |
+
overlap: The selected segment overlap.
|
126 |
+
species_list_choice: The choice for the species list.
|
127 |
+
species_list_file: The selected custom species list file.
|
128 |
+
lat: The selected latitude.
|
129 |
+
lon: The selected longitude.
|
130 |
+
week: The selected week of the year.
|
131 |
+
use_yearlong: Use yearlong instead of week.
|
132 |
+
sf_thresh: The threshold for the predicted species list.
|
133 |
+
custom_classifier_file: Custom classifier to be used.
|
134 |
+
output_type: The type of result to be generated.
|
135 |
+
locale: The translation to be used.
|
136 |
+
batch_size: The number of samples in a batch.
|
137 |
+
threads: The number of threads to be used.
|
138 |
+
input_dir: The input directory.
|
139 |
+
progress: The gradio progress bar.
|
140 |
+
"""
|
141 |
+
logging.info('second level')
|
142 |
+
if progress is not None:
|
143 |
+
progress(0, desc="Preparing ...")
|
144 |
+
# locale = locale.lower()
|
145 |
+
# Load eBird codes, labels
|
146 |
+
#cfg.CODES = analyze.loadCodes()
|
147 |
+
# cfg.LABELS = utils.readLines(ORIGINAL_LABELS_FILE)
|
148 |
+
cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK = -1, -1, -1
|
149 |
+
cfg.LOCATION_FILTER_THRESHOLD = 0.03
|
150 |
+
script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
|
151 |
+
cfg.BAT_CLASSIFIER_LOCATION = os.path.join(script_dir, cfg.BAT_CLASSIFIER_LOCATION)
|
152 |
+
|
153 |
+
if species_list_choice == "Bavaria":
|
154 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Bavaria-144kHz.tflite"
|
155 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Bavaria-144kHz_Labels.txt"
|
156 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
157 |
+
cfg.LATITUDE = -1
|
158 |
+
cfg.LONGITUDE = -1
|
159 |
+
cfg.SPECIES_LIST_FILE = None
|
160 |
+
cfg.SPECIES_LIST = []
|
161 |
+
locale = "de"
|
162 |
+
|
163 |
+
elif species_list_choice == "EU":
|
164 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-EU-144kHz.tflite"
|
165 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-EU-144kHz_Labels.txt"
|
166 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
167 |
+
cfg.LATITUDE = -1
|
168 |
+
cfg.LONGITUDE = -1
|
169 |
+
cfg.SPECIES_LIST_FILE = None
|
170 |
+
cfg.SPECIES_LIST = []
|
171 |
+
locale = "en"
|
172 |
+
|
173 |
+
elif species_list_choice == "Scotland":
|
174 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Scotland-144kHz.tflite"
|
175 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Scotland-144kHz_Labels.txt"
|
176 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
177 |
+
cfg.LATITUDE = -1
|
178 |
+
cfg.LONGITUDE = -1
|
179 |
+
cfg.SPECIES_LIST_FILE = None
|
180 |
+
cfg.SPECIES_LIST = []
|
181 |
+
locale = "en"
|
182 |
+
|
183 |
+
elif species_list_choice == "UK":
|
184 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-UK-144kHz.tflite"
|
185 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-UK-144kHz_Labels.txt"
|
186 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
187 |
+
cfg.LATITUDE = -1
|
188 |
+
cfg.LONGITUDE = -1
|
189 |
+
cfg.SPECIES_LIST_FILE = None
|
190 |
+
cfg.SPECIES_LIST = []
|
191 |
+
locale = "en"
|
192 |
+
|
193 |
+
elif species_list_choice == "USA":
|
194 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-USA-144kHz.tflite"
|
195 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-USA-144kHz_Labels.txt"
|
196 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
197 |
+
cfg.LATITUDE = -1
|
198 |
+
cfg.LONGITUDE = -1
|
199 |
+
cfg.SPECIES_LIST_FILE = None
|
200 |
+
cfg.SPECIES_LIST = []
|
201 |
+
locale = "en"
|
202 |
+
|
203 |
+
else:
|
204 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-EU-144kHz.tflite"
|
205 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-EU-144kHz_Labels.txt"
|
206 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
207 |
+
cfg.LATITUDE = -1
|
208 |
+
cfg.LONGITUDE = -1
|
209 |
+
cfg.SPECIES_LIST_FILE = None
|
210 |
+
cfg.SPECIES_LIST = []
|
211 |
+
locale = "en"
|
212 |
+
|
213 |
+
# Load translated labels
|
214 |
+
lfile = os.path.join(cfg.TRANSLATED_BAT_LABELS_PATH,
|
215 |
+
os.path.basename(cfg.LABELS_FILE).replace(".txt", f"_{locale}.txt"))
|
216 |
+
if not locale in ["en"] and os.path.isfile(lfile):
|
217 |
+
cfg.TRANSLATED_LABELS = utils.readLines(lfile)
|
218 |
+
else:
|
219 |
+
cfg.TRANSLATED_LABELS = cfg.LABELS
|
220 |
+
|
221 |
+
if len(cfg.SPECIES_LIST) == 0:
|
222 |
+
print(f"Species list contains {len(cfg.LABELS)} species")
|
223 |
+
else:
|
224 |
+
print(f"Species list contains {len(cfg.SPECIES_LIST)} species")
|
225 |
+
|
226 |
+
cfg.INPUT_PATH = input_path
|
227 |
+
|
228 |
+
if input_dir:
|
229 |
+
cfg.OUTPUT_PATH = output_path if output_path else input_dir
|
230 |
+
else:
|
231 |
+
cfg.OUTPUT_PATH = output_path if output_path else input_path.split(".", 1)[0] + ".csv"
|
232 |
+
|
233 |
+
# Parse input files
|
234 |
+
if input_dir:
|
235 |
+
cfg.FILE_LIST = utils.collect_audio_files(input_dir)
|
236 |
+
cfg.INPUT_PATH = input_dir
|
237 |
+
elif os.path.isdir(cfg.INPUT_PATH):
|
238 |
+
cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
|
239 |
+
else:
|
240 |
+
cfg.FILE_LIST = [cfg.INPUT_PATH]
|
241 |
+
|
242 |
+
validate(cfg.FILE_LIST, "No audio files found.")
|
243 |
+
cfg.MIN_CONFIDENCE = confidence
|
244 |
+
cfg.SIGMOID_SENSITIVITY = sensitivity
|
245 |
+
cfg.SIG_OVERLAP = overlap
|
246 |
+
|
247 |
+
# Set result type
|
248 |
+
cfg.RESULT_TYPE = OUTPUT_TYPE_MAP[output_type] if output_type in OUTPUT_TYPE_MAP else output_type.lower()
|
249 |
+
|
250 |
+
if not cfg.RESULT_TYPE in ["table", "audacity", "r", "csv"]:
|
251 |
+
cfg.RESULT_TYPE = "table"
|
252 |
+
# Set number of threads
|
253 |
+
if input_dir:
|
254 |
+
cfg.CPU_THREADS = max(1, int(threads))
|
255 |
+
cfg.TFLITE_THREADS = 1
|
256 |
+
else:
|
257 |
+
cfg.CPU_THREADS = 1
|
258 |
+
cfg.TFLITE_THREADS = max(1, int(threads))
|
259 |
+
# Set batch size
|
260 |
+
cfg.BATCH_SIZE = max(1, int(batch_size))
|
261 |
+
flist = []
|
262 |
+
|
263 |
+
for f in cfg.FILE_LIST:
|
264 |
+
flist.append((f, cfg.get_config()))
|
265 |
+
|
266 |
+
result_list = []
|
267 |
+
|
268 |
+
if progress is not None:
|
269 |
+
progress(0, desc="Starting ...")
|
270 |
+
# Analyze files
|
271 |
+
if cfg.CPU_THREADS < 2:
|
272 |
+
for entry in flist:
|
273 |
+
result = analyzeFile_wrapper(entry)
|
274 |
+
result_list.append(result)
|
275 |
+
else:
|
276 |
+
executor = None
|
277 |
+
with concurrent.futures.ProcessPoolExecutor(max_workers=cfg.CPU_THREADS) as executor:
|
278 |
+
futures = (executor.submit(analyzeFile_wrapper, arg) for arg in flist)
|
279 |
+
|
280 |
+
for i, f in enumerate(concurrent.futures.as_completed(futures), start=1):
|
281 |
+
if progress is not None:
|
282 |
+
progress((i, len(flist)), total=len(flist), unit="files")
|
283 |
+
result = f.result()
|
284 |
+
result_list.append(result)
|
285 |
+
return [[os.path.relpath(r[0], input_dir), r[1]] for r in result_list] if input_dir else cfg.OUTPUT_PATH
|
286 |
+
|
287 |
+
def extractSegments_wrapper(entry):
|
288 |
+
return (entry[0][0], segments.extractSegments(entry))
|
289 |
+
def extract_segments(audio_dir, result_dir, output_dir, min_conf, num_seq, seq_length, threads, progress=gr.Progress()):
|
290 |
+
validate(audio_dir, "No audio directory selected")
|
291 |
+
|
292 |
+
if not result_dir:
|
293 |
+
result_dir = audio_dir
|
294 |
+
|
295 |
+
if not output_dir:
|
296 |
+
output_dir = audio_dir
|
297 |
+
|
298 |
+
if progress is not None:
|
299 |
+
progress(0, desc="Searching files ...")
|
300 |
+
|
301 |
+
|
302 |
+
# Parse audio and result folders
|
303 |
+
cfg.FILE_LIST = segments.parseFolders(audio_dir, result_dir)
|
304 |
+
|
305 |
+
# Set output folder
|
306 |
+
cfg.OUTPUT_PATH = output_dir
|
307 |
+
|
308 |
+
# Set number of threads
|
309 |
+
cfg.CPU_THREADS = int(threads)
|
310 |
+
|
311 |
+
# Set confidence threshold
|
312 |
+
cfg.MIN_CONFIDENCE = max(0.01, min(0.99, min_conf))
|
313 |
+
|
314 |
+
# Parse file list and make list of segments
|
315 |
+
cfg.FILE_LIST = segments.parseFiles(cfg.FILE_LIST, max(1, int(num_seq)))
|
316 |
+
|
317 |
+
# Add config items to each file list entry.
|
318 |
+
# We have to do this for Windows which does not
|
319 |
+
# support fork() and thus each process has to
|
320 |
+
# have its own config. USE LINUX!
|
321 |
+
flist = [(entry, max(cfg.SIG_LENGTH, float(seq_length)), cfg.get_config()) for entry in cfg.FILE_LIST]
|
322 |
+
|
323 |
+
result_list = []
|
324 |
+
|
325 |
+
# Extract segments
|
326 |
+
if cfg.CPU_THREADS < 2:
|
327 |
+
for i, entry in enumerate(flist):
|
328 |
+
result = extractSegments_wrapper(entry)
|
329 |
+
result_list.append(result)
|
330 |
+
|
331 |
+
if progress is not None:
|
332 |
+
progress((i, len(flist)), total=len(flist), unit="files")
|
333 |
+
else:
|
334 |
+
with concurrent.futures.ProcessPoolExecutor(max_workers=cfg.CPU_THREADS) as executor:
|
335 |
+
futures = (executor.submit(extractSegments_wrapper, arg) for arg in flist)
|
336 |
+
for i, f in enumerate(concurrent.futures.as_completed(futures), start=1):
|
337 |
+
if progress is not None:
|
338 |
+
progress((i, len(flist)), total=len(flist), unit="files")
|
339 |
+
result = f.result()
|
340 |
+
|
341 |
+
result_list.append(result)
|
342 |
+
|
343 |
+
return [[os.path.relpath(r[0], audio_dir), r[1]] for r in result_list]
|
344 |
+
|
345 |
+
|
346 |
+
def select_file(filetypes=()):
|
347 |
+
"""Creates a file selection dialog.
|
348 |
+
Args:
|
349 |
+
filetypes: List of filetypes to be filtered in the dialog.
|
350 |
+
Returns:
|
351 |
+
The selected file or None of the dialog was canceled.
|
352 |
+
"""
|
353 |
+
files = _WINDOW.create_file_dialog(webview.OPEN_DIALOG, file_types=filetypes)
|
354 |
+
return files[0] if files else None
|
355 |
+
|
356 |
+
def format_seconds(secs: float):
|
357 |
+
"""Formats a number of seconds into a string.
|
358 |
+
|
359 |
+
Formats the seconds into the format "h:mm:ss.ms"
|
360 |
+
|
361 |
+
Args:
|
362 |
+
secs: Number of seconds.
|
363 |
+
|
364 |
+
Returns:
|
365 |
+
A string with the formatted seconds.
|
366 |
+
"""
|
367 |
+
hours, secs = divmod(secs, 3600)
|
368 |
+
minutes, secs = divmod(secs, 60)
|
369 |
+
|
370 |
+
return "{:2.0f}:{:02.0f}:{:06.3f}".format(hours, minutes, secs)
|
371 |
+
|
372 |
+
def select_directory(collect_files=True):
|
373 |
+
"""Shows a directory selection system dialog.
|
374 |
+
|
375 |
+
Uses the pywebview to create a system dialog.
|
376 |
+
|
377 |
+
Args:
|
378 |
+
collect_files: If True, also lists a files inside the directory.
|
379 |
+
|
380 |
+
Returns:
|
381 |
+
If collect_files==True, returns (directory path, list of (relative file path, audio length))
|
382 |
+
else just the directory path.
|
383 |
+
All values will be None of the dialog is cancelled.
|
384 |
+
"""
|
385 |
+
dir_name = _WINDOW.create_file_dialog(webview.FOLDER_DIALOG)
|
386 |
+
|
387 |
+
if collect_files:
|
388 |
+
if not dir_name:
|
389 |
+
return None, None
|
390 |
+
|
391 |
+
files = utils.collect_audio_files(dir_name[0])
|
392 |
+
|
393 |
+
return dir_name[0], [
|
394 |
+
[os.path.relpath(file, dir_name[0]), format_seconds(librosa.get_duration(filename=file))] for file in files
|
395 |
+
]
|
396 |
+
|
397 |
+
return dir_name[0] if dir_name else None
|
398 |
+
|
399 |
+
|
400 |
+
def show_species_choice(choice: str):
|
401 |
+
"""Sets the visibility of the species list choices.
|
402 |
+
Args:
|
403 |
+
choice: The label of the currently active choice.
|
404 |
+
Returns:
|
405 |
+
A list of [
|
406 |
+
Row update,
|
407 |
+
File update,
|
408 |
+
Column update,
|
409 |
+
Column update,
|
410 |
+
]
|
411 |
+
"""
|
412 |
+
return [
|
413 |
+
gr.Row.update(visible=True),
|
414 |
+
gr.File.update(visible=False),
|
415 |
+
gr.Column.update(visible=False),
|
416 |
+
gr.Column.update(visible=False),
|
417 |
+
]
|
418 |
+
|
419 |
+
|
420 |
+
|
421 |
+
|
422 |
+
|
423 |
+
|
424 |
+
#
|
425 |
+
# VIEW - This is where the UI elements are defined
|
426 |
+
#
|
427 |
+
|
428 |
+
def sample_sliders(opened=True):
|
429 |
+
"""Creates the gradio accordion for the inference settings.
|
430 |
+
Args:
|
431 |
+
opened: If True the accordion is open on init.
|
432 |
+
Returns:
|
433 |
+
A tuple with the created elements:
|
434 |
+
(Slider (min confidence), Slider (sensitivity), Slider (overlap))
|
435 |
+
"""
|
436 |
+
with gr.Accordion("Inference settings", open=opened):
|
437 |
+
with gr.Row():
|
438 |
+
confidence_slider = gr.Slider(
|
439 |
+
minimum=0, maximum=1, value=0.5, step=0.01, label="Minimum Confidence", info="Minimum confidence threshold."
|
440 |
+
)
|
441 |
+
sensitivity_slider = gr.Slider(
|
442 |
+
minimum=0.5,
|
443 |
+
maximum=1.5,
|
444 |
+
value=1,
|
445 |
+
step=0.01,
|
446 |
+
label="Sensitivity",
|
447 |
+
info="Detection sensitivity; Higher values result in higher sensitivity.",
|
448 |
+
)
|
449 |
+
overlap_slider = gr.Slider(
|
450 |
+
minimum=0, maximum=2.99, value=0, step=0.01, label="Overlap", info="Overlap of prediction segments."
|
451 |
+
)
|
452 |
+
|
453 |
+
return confidence_slider, sensitivity_slider, overlap_slider
|
454 |
+
|
455 |
+
def locale():
|
456 |
+
"""Creates the gradio elements for locale selection
|
457 |
+
Reads the translated labels inside the checkpoints directory.
|
458 |
+
Returns:
|
459 |
+
The dropdown element.
|
460 |
+
"""
|
461 |
+
label_files = os.listdir(os.path.join(os.path.dirname(sys.argv[0]), ORIGINAL_TRANSLATED_LABELS_PATH))
|
462 |
+
options = ["EN"] + [label_file.rsplit("_", 1)[-1].split(".")[0].upper() for label_file in label_files]
|
463 |
+
|
464 |
+
return gr.Dropdown(options, value="EN", label="Locale", info="Locale for the translated species common names.",visible=False)
|
465 |
+
|
466 |
+
def species_lists(opened=True):
|
467 |
+
"""Creates the gradio accordion for species selection.
|
468 |
+
Args:
|
469 |
+
opened: If True the accordion is open on init.
|
470 |
+
Returns:
|
471 |
+
A tuple with the created elements:
|
472 |
+
(Radio (choice), File (custom species list), Slider (lat), Slider (lon), Slider (week), Slider (threshold), Checkbox (yearlong?), State (custom classifier))
|
473 |
+
"""
|
474 |
+
with gr.Accordion("Area selection", open=opened):
|
475 |
+
with gr.Row():
|
476 |
+
species_list_radio = gr.Radio(
|
477 |
+
[_AREA_ONE, _AREA_TWO, _AREA_THREE, _AREA_FOUR, _AREA_FIFE],
|
478 |
+
value="All regions",
|
479 |
+
label="Regions list",
|
480 |
+
info="List of all possible regions",
|
481 |
+
elem_classes="d-block",
|
482 |
+
)
|
483 |
+
# species_list_radio.change(
|
484 |
+
# show_species_choice,
|
485 |
+
# inputs=[species_list_radio],
|
486 |
+
# outputs=[ ],
|
487 |
+
# show_progress=False,
|
488 |
+
# )
|
489 |
+
#
|
490 |
+
return species_list_radio
|
491 |
+
|
492 |
+
#
|
493 |
+
# Design main frame for analysis of a single file
|
494 |
+
#
|
495 |
+
def build_single_analysis_tab():
|
496 |
+
with gr.Tab("Single file"):
|
497 |
+
audio_input = gr.Audio(type="filepath", label="file", elem_id="single_file_audio")
|
498 |
+
confidence_slider, sensitivity_slider, overlap_slider = sample_sliders(False)
|
499 |
+
species_list_radio = species_lists(False)
|
500 |
+
locale_radio = locale()
|
501 |
+
|
502 |
+
inputs = [
|
503 |
+
audio_input,
|
504 |
+
confidence_slider,
|
505 |
+
sensitivity_slider,
|
506 |
+
overlap_slider,
|
507 |
+
species_list_radio,
|
508 |
+
locale_radio
|
509 |
+
]
|
510 |
+
|
511 |
+
output_dataframe = gr.Dataframe(
|
512 |
+
type="pandas",
|
513 |
+
headers=["Start (s)", "End (s)", "Scientific name", "Common name", "Confidence"],
|
514 |
+
elem_classes="mh-200",
|
515 |
+
)
|
516 |
+
single_file_analyze = gr.Button("Analyze")
|
517 |
+
single_file_analyze.click(runSingleFileAnalysis,
|
518 |
+
inputs=inputs,
|
519 |
+
outputs=output_dataframe,
|
520 |
+
)
|
521 |
+
|
522 |
+
|
523 |
+
def build_multi_analysis_tab():
|
524 |
+
with gr.Tab("Multiple files"):
|
525 |
+
input_directory_state = gr.State()
|
526 |
+
output_directory_predict_state = gr.State()
|
527 |
+
with gr.Row():
|
528 |
+
with gr.Column():
|
529 |
+
select_directory_btn = gr.Button("Select directory (recursive)")
|
530 |
+
directory_input = gr.Matrix(interactive=False, elem_classes="mh-200", headers=["Subpath", "Length"])
|
531 |
+
|
532 |
+
def select_directory_on_empty():
|
533 |
+
res = select_directory()
|
534 |
+
|
535 |
+
return res if res[1] else [res[0], [["No files found"]]]
|
536 |
+
|
537 |
+
select_directory_btn.click(
|
538 |
+
select_directory_on_empty, outputs=[input_directory_state, directory_input], show_progress=True
|
539 |
+
)
|
540 |
+
|
541 |
+
with gr.Column():
|
542 |
+
select_out_directory_btn = gr.Button("Select output directory.")
|
543 |
+
selected_out_textbox = gr.Textbox(
|
544 |
+
label="Output directory",
|
545 |
+
interactive=False,
|
546 |
+
placeholder="If not selected, the input directory will be used.",
|
547 |
+
)
|
548 |
+
|
549 |
+
def select_directory_wrapper():
|
550 |
+
return (select_directory(collect_files=False),) * 2
|
551 |
+
|
552 |
+
select_out_directory_btn.click(
|
553 |
+
select_directory_wrapper,
|
554 |
+
outputs=[output_directory_predict_state, selected_out_textbox],
|
555 |
+
show_progress=False,
|
556 |
+
)
|
557 |
+
|
558 |
+
confidence_slider, sensitivity_slider, overlap_slider = sample_sliders()
|
559 |
+
species_list_radio = species_lists(False)
|
560 |
+
|
561 |
+
output_type_radio = gr.Radio(
|
562 |
+
list(OUTPUT_TYPE_MAP.keys()),
|
563 |
+
value="Raven selection table",
|
564 |
+
label="Result type",
|
565 |
+
info="Specifies output format.",
|
566 |
+
)
|
567 |
+
|
568 |
+
with gr.Row():
|
569 |
+
batch_size_number = gr.Number(
|
570 |
+
precision=1, label="Batch size", value=1, info="Number of samples to process at the same time."
|
571 |
+
)
|
572 |
+
threads_number = gr.Number(precision=1, label="Threads", value=4, info="Number of CPU threads.")
|
573 |
+
|
574 |
+
locale_radio = locale()
|
575 |
+
|
576 |
+
start_batch_analysis_btn = gr.Button("Analyze")
|
577 |
+
|
578 |
+
result_grid = gr.Matrix(headers=["File", "Execution"], elem_classes="mh-200")
|
579 |
+
|
580 |
+
inputs = [
|
581 |
+
output_directory_predict_state,
|
582 |
+
confidence_slider,
|
583 |
+
sensitivity_slider,
|
584 |
+
overlap_slider,
|
585 |
+
species_list_radio,
|
586 |
+
locale_radio,
|
587 |
+
batch_size_number,
|
588 |
+
threads_number,
|
589 |
+
input_directory_state,
|
590 |
+
output_type_radio
|
591 |
+
]
|
592 |
+
|
593 |
+
start_batch_analysis_btn.click(runBatchAnalysis, inputs=inputs, outputs=result_grid)
|
594 |
+
|
595 |
+
def build_segments_tab():
|
596 |
+
with gr.Tab("Segments"):
|
597 |
+
audio_directory_state = gr.State()
|
598 |
+
result_directory_state = gr.State()
|
599 |
+
output_directory_state = gr.State()
|
600 |
+
|
601 |
+
def select_directory_to_state_and_tb():
|
602 |
+
return (select_directory(collect_files=False),) * 2
|
603 |
+
|
604 |
+
with gr.Row():
|
605 |
+
select_audio_directory_btn = gr.Button("Select audio directory (recursive)")
|
606 |
+
selected_audio_directory_tb = gr.Textbox(show_label=False, interactive=False)
|
607 |
+
select_audio_directory_btn.click(
|
608 |
+
select_directory_to_state_and_tb,
|
609 |
+
outputs=[selected_audio_directory_tb, audio_directory_state],
|
610 |
+
show_progress=False,
|
611 |
+
)
|
612 |
+
|
613 |
+
with gr.Row():
|
614 |
+
select_result_directory_btn = gr.Button("Select result directory")
|
615 |
+
selected_result_directory_tb = gr.Textbox(
|
616 |
+
show_label=False, interactive=False, placeholder="Same as audio directory if not selected"
|
617 |
+
)
|
618 |
+
select_result_directory_btn.click(
|
619 |
+
select_directory_to_state_and_tb,
|
620 |
+
outputs=[result_directory_state, selected_result_directory_tb],
|
621 |
+
show_progress=False,
|
622 |
+
)
|
623 |
+
|
624 |
+
with gr.Row():
|
625 |
+
select_output_directory_btn = gr.Button("Select output directory")
|
626 |
+
selected_output_directory_tb = gr.Textbox(
|
627 |
+
show_label=False, interactive=False, placeholder="Same as audio directory if not selected"
|
628 |
+
)
|
629 |
+
select_output_directory_btn.click(
|
630 |
+
select_directory_to_state_and_tb,
|
631 |
+
outputs=[selected_output_directory_tb, output_directory_state],
|
632 |
+
show_progress=False,
|
633 |
+
)
|
634 |
+
|
635 |
+
min_conf_slider = gr.Slider(
|
636 |
+
minimum=0.1, maximum=0.99, step=0.01, label="Minimum confidence", info="Minimum confidence threshold."
|
637 |
+
)
|
638 |
+
num_seq_number = gr.Number(
|
639 |
+
100, label="Max number of segments", info="Maximum number of randomly extracted segments per species."
|
640 |
+
)
|
641 |
+
seq_length_number = gr.Number(3.0, label="Sequence length", info="Length of extracted segments in seconds.")
|
642 |
+
threads_number = gr.Number(4, label="Threads", info="Number of CPU threads.")
|
643 |
+
|
644 |
+
extract_segments_btn = gr.Button("Extract segments")
|
645 |
+
|
646 |
+
result_grid = gr.Matrix(headers=["File", "Execution"], elem_classes="mh-200")
|
647 |
+
|
648 |
+
extract_segments_btn.click(
|
649 |
+
extract_segments,
|
650 |
+
inputs=[
|
651 |
+
audio_directory_state,
|
652 |
+
result_directory_state,
|
653 |
+
output_directory_state,
|
654 |
+
min_conf_slider,
|
655 |
+
num_seq_number,
|
656 |
+
seq_length_number,
|
657 |
+
threads_number,
|
658 |
+
],
|
659 |
+
outputs=result_grid,
|
660 |
+
)
|
661 |
+
|
662 |
+
if __name__ == "__main__":
|
663 |
+
freeze_support()
|
664 |
+
with gr.Blocks(
|
665 |
+
css=r".d-block .wrap {display: block !important;} .mh-200 {max-height: 300px; overflow-y: auto !important;} footer {display: none !important;} #single_file_audio, #single_file_audio * {max-height: 81.6px; min-height: 0;}",
|
666 |
+
theme=gr.themes.Default(),
|
667 |
+
analytics_enabled=False,
|
668 |
+
) as demo:
|
669 |
+
build_single_analysis_tab()
|
670 |
+
build_multi_analysis_tab()
|
671 |
+
build_segments_tab()
|
672 |
+
|
673 |
+
url = demo.queue(api_open=False).launch(prevent_thread_lock=True, quiet=True)[1]
|
674 |
+
#_WINDOW = webview.create_window("BattyBirdNET-Analyzer", url.rstrip("/") +
|
675 |
+
# "?__theme=light", min_size=(1024, 768))
|
676 |
+
# webview.start(private_mode=False)
|
bat_ident.py
ADDED
@@ -0,0 +1,616 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Module to analyze audio samples.
|
2 |
+
"""
|
3 |
+
import argparse
|
4 |
+
import datetime
|
5 |
+
import json
|
6 |
+
import operator
|
7 |
+
import os
|
8 |
+
import sys
|
9 |
+
from multiprocessing import Pool, freeze_support
|
10 |
+
import numpy as np
|
11 |
+
import audio
|
12 |
+
import config as cfg
|
13 |
+
import model
|
14 |
+
import species
|
15 |
+
import utils
|
16 |
+
import subprocess
|
17 |
+
import pathlib
|
18 |
+
|
19 |
+
|
20 |
+
def load_codes():
|
21 |
+
"""Loads the eBird codes.
|
22 |
+
Returns:
|
23 |
+
A dictionary containing the eBird codes.
|
24 |
+
"""
|
25 |
+
with open(cfg.CODES_FILE, "r") as cfile:
|
26 |
+
codes = json.load(cfile)
|
27 |
+
return codes
|
28 |
+
|
29 |
+
def save_result_file(r: dict[str, list], path: str, afile_path: str):
|
30 |
+
"""Saves the results to the hard drive.
|
31 |
+
Args:
|
32 |
+
r: The dictionary with {segment: scores}.
|
33 |
+
path: The path where the result should be saved.
|
34 |
+
afile_path: The path to audio file.
|
35 |
+
"""
|
36 |
+
# Make folder if it doesn't exist
|
37 |
+
if os.path.dirname(path):
|
38 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
39 |
+
|
40 |
+
# Selection table
|
41 |
+
out_string = ""
|
42 |
+
|
43 |
+
if cfg.RESULT_TYPE == "table":
|
44 |
+
# Raven selection header
|
45 |
+
header = "Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tSpecies Code\tCommon Name\tConfidence\n"
|
46 |
+
selection_id = 0
|
47 |
+
# Write header
|
48 |
+
out_string += header
|
49 |
+
|
50 |
+
# Extract valid predictions for every timestamp
|
51 |
+
for timestamp in get_sorted_timestamps(r):
|
52 |
+
rstring = ""
|
53 |
+
start, end = timestamp.split("-", 1)
|
54 |
+
|
55 |
+
for c in r[timestamp]:
|
56 |
+
if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
|
57 |
+
selection_id += 1
|
58 |
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
59 |
+
rstring += "{}\tSpectrogram 1\t1\t{}\t{}\t{}\t{}\t{:.4f}\n".format(
|
60 |
+
selection_id,
|
61 |
+
start,
|
62 |
+
end,
|
63 |
+
cfg.CODES[c[0]] if c[0] in cfg.CODES else c[0],
|
64 |
+
label.split("_", 1)[-1],
|
65 |
+
c[1],
|
66 |
+
)
|
67 |
+
|
68 |
+
# Write result string to file
|
69 |
+
out_string += rstring
|
70 |
+
|
71 |
+
elif cfg.RESULT_TYPE == "audacity":
|
72 |
+
# Audacity timeline labels
|
73 |
+
for timestamp in get_sorted_timestamps(r):
|
74 |
+
rstring = ""
|
75 |
+
|
76 |
+
for c in r[timestamp]:
|
77 |
+
if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
|
78 |
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
79 |
+
rstring += "{}\t{}\t{:.4f}\n".format(timestamp.replace("-", "\t"), label.replace("_", ", "), c[1])
|
80 |
+
|
81 |
+
# Write result string to file
|
82 |
+
out_string += rstring
|
83 |
+
|
84 |
+
elif cfg.RESULT_TYPE == "r":
|
85 |
+
# Output format for R
|
86 |
+
header = ("filepath,start,end,scientific_name,common_name,confidence,lat,lon,week,"
|
87 |
+
"overlap,sensitivity,min_conf,species_list,model")
|
88 |
+
out_string += header
|
89 |
+
|
90 |
+
for timestamp in get_sorted_timestamps(r):
|
91 |
+
rstring = ""
|
92 |
+
start, end = timestamp.split("-", 1)
|
93 |
+
|
94 |
+
for c in r[timestamp]:
|
95 |
+
if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
|
96 |
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
97 |
+
rstring += "\n{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{},{},{},{}".format(
|
98 |
+
afile_path,
|
99 |
+
start,
|
100 |
+
end,
|
101 |
+
label.split("_", 1)[0],
|
102 |
+
label.split("_", 1)[-1],
|
103 |
+
c[1],
|
104 |
+
cfg.LATITUDE,
|
105 |
+
cfg.LONGITUDE,
|
106 |
+
cfg.WEEK,
|
107 |
+
cfg.SIG_OVERLAP,
|
108 |
+
(1.0 - cfg.SIGMOID_SENSITIVITY) + 1.0,
|
109 |
+
cfg.MIN_CONFIDENCE,
|
110 |
+
cfg.SPECIES_LIST_FILE,
|
111 |
+
os.path.basename(cfg.MODEL_PATH),
|
112 |
+
)
|
113 |
+
|
114 |
+
# Write result string to file
|
115 |
+
out_string += rstring
|
116 |
+
|
117 |
+
elif cfg.RESULT_TYPE == "kaleidoscope":
|
118 |
+
# Output format for kaleidoscope
|
119 |
+
header = ("INDIR,FOLDER,IN FILE,OFFSET,DURATION,scientific_name,"
|
120 |
+
"common_name,confidence,lat,lon,week,overlap,sensitivity")
|
121 |
+
out_string += header
|
122 |
+
|
123 |
+
folder_path, filename = os.path.split(afile_path)
|
124 |
+
parent_folder, folder_name = os.path.split(folder_path)
|
125 |
+
|
126 |
+
for timestamp in get_sorted_timestamps(r):
|
127 |
+
rstring = ""
|
128 |
+
start, end = timestamp.split("-", 1)
|
129 |
+
|
130 |
+
for c in r[timestamp]:
|
131 |
+
if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
|
132 |
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
133 |
+
rstring += "\n{},{},{},{},{},{},{},{:.4f},{:.4f},{:.4f},{},{},{}".format(
|
134 |
+
parent_folder.rstrip("/"),
|
135 |
+
folder_name,
|
136 |
+
filename,
|
137 |
+
start,
|
138 |
+
float(end) - float(start),
|
139 |
+
label.split("_", 1)[0],
|
140 |
+
label.split("_", 1)[-1],
|
141 |
+
c[1],
|
142 |
+
cfg.LATITUDE,
|
143 |
+
cfg.LONGITUDE,
|
144 |
+
cfg.WEEK,
|
145 |
+
cfg.SIG_OVERLAP,
|
146 |
+
(1.0 - cfg.SIGMOID_SENSITIVITY) + 1.0,
|
147 |
+
)
|
148 |
+
|
149 |
+
# Write result string to file
|
150 |
+
out_string += rstring
|
151 |
+
|
152 |
+
else:
|
153 |
+
# CSV output file
|
154 |
+
header = "Start (s),End (s),Scientific name,Common name,Confidence\n"
|
155 |
+
|
156 |
+
# Write header
|
157 |
+
out_string += header
|
158 |
+
|
159 |
+
for timestamp in get_sorted_timestamps(r):
|
160 |
+
rstring = ""
|
161 |
+
|
162 |
+
for c in r[timestamp]:
|
163 |
+
start, end = timestamp.split("-", 1)
|
164 |
+
|
165 |
+
if c[1] > cfg.MIN_CONFIDENCE and (not cfg.SPECIES_LIST or c[0] in cfg.SPECIES_LIST):
|
166 |
+
label = cfg.TRANSLATED_LABELS[cfg.LABELS.index(c[0])]
|
167 |
+
rstring += "{},{},{},{},{:.4f}\n".format(start, end, label.split("_", 1)[0],
|
168 |
+
label.split("_", 1)[-1], c[1])
|
169 |
+
|
170 |
+
# Write result string to file
|
171 |
+
out_string += rstring
|
172 |
+
|
173 |
+
# Save as file
|
174 |
+
with open(path, "w", encoding="utf-8") as rfile:
|
175 |
+
rfile.write(out_string)
|
176 |
+
return out_string
|
177 |
+
|
178 |
+
|
179 |
+
def get_sorted_timestamps(results: dict[str, list]):
|
180 |
+
"""Sorts the results based on the segments.
|
181 |
+
Args:
|
182 |
+
results: The dictionary with {segment: scores}.
|
183 |
+
Returns:
|
184 |
+
Returns the sorted list of segments and their scores.
|
185 |
+
"""
|
186 |
+
return sorted(results, key=lambda t: float(t.split("-", 1)[0]))
|
187 |
+
|
188 |
+
|
189 |
+
def get_raw_audio_from_file(fpath: str):
|
190 |
+
"""Reads an audio file.
|
191 |
+
Reads the file and splits the signal into chunks.
|
192 |
+
Args:
|
193 |
+
fpath: Path to the audio file.
|
194 |
+
Returns:
|
195 |
+
The signal split into a list of chunks.
|
196 |
+
"""
|
197 |
+
# Open file
|
198 |
+
sig, rate = audio.openAudioFile(fpath, cfg.SAMPLE_RATE)
|
199 |
+
|
200 |
+
# Split into raw audio chunks
|
201 |
+
chunks = audio.splitSignal(sig, rate, cfg.SIG_LENGTH, cfg.SIG_OVERLAP, cfg.SIG_MINLEN)
|
202 |
+
|
203 |
+
return chunks
|
204 |
+
|
205 |
+
|
206 |
+
def predict(samples):
|
207 |
+
"""Predicts the classes for the given samples.
|
208 |
+
|
209 |
+
Args:
|
210 |
+
samples: Samples to be predicted.
|
211 |
+
|
212 |
+
Returns:
|
213 |
+
The prediction scores.
|
214 |
+
"""
|
215 |
+
# Prepare sample and pass through model
|
216 |
+
data = np.array(samples, dtype="float32")
|
217 |
+
prediction = model.predict(data)
|
218 |
+
|
219 |
+
# Logits or sigmoid activations?
|
220 |
+
if cfg.APPLY_SIGMOID:
|
221 |
+
prediction = model.flat_sigmoid(np.array(prediction), sensitivity=-cfg.SIGMOID_SENSITIVITY)
|
222 |
+
|
223 |
+
return prediction
|
224 |
+
|
225 |
+
|
226 |
+
def analyze_file(item):
|
227 |
+
"""Analyzes a file.
|
228 |
+
|
229 |
+
Predicts the scores for the file and saves the results.
|
230 |
+
|
231 |
+
Args:
|
232 |
+
item: Tuple containing (file path, config)
|
233 |
+
|
234 |
+
Returns:
|
235 |
+
The `True` if the file was analyzed successfully.
|
236 |
+
"""
|
237 |
+
# Get file path and restore cfg
|
238 |
+
fpath: str = item[0]
|
239 |
+
cfg.set_config(item[1])
|
240 |
+
|
241 |
+
# Start time
|
242 |
+
start_time = datetime.datetime.now()
|
243 |
+
|
244 |
+
# Status
|
245 |
+
print(f"Analyzing {fpath}", flush=True)
|
246 |
+
|
247 |
+
try:
|
248 |
+
# Open audio file and split into 3-second chunks
|
249 |
+
chunks = get_raw_audio_from_file(fpath)
|
250 |
+
|
251 |
+
# If no chunks, show error and skip
|
252 |
+
except Exception as ex:
|
253 |
+
print(f"Error: Cannot open audio file {fpath}", flush=True)
|
254 |
+
utils.writeErrorLog(ex)
|
255 |
+
|
256 |
+
return False
|
257 |
+
|
258 |
+
# Process each chunk
|
259 |
+
try:
|
260 |
+
start, end = 0, cfg.SIG_LENGTH
|
261 |
+
results = {}
|
262 |
+
samples = []
|
263 |
+
timestamps = []
|
264 |
+
|
265 |
+
for chunk_index, chunk in enumerate(chunks):
|
266 |
+
# Add to batch
|
267 |
+
samples.append(chunk)
|
268 |
+
timestamps.append([start, end])
|
269 |
+
|
270 |
+
# Advance start and end
|
271 |
+
start += cfg.SIG_LENGTH - cfg.SIG_OVERLAP
|
272 |
+
end = start + cfg.SIG_LENGTH
|
273 |
+
|
274 |
+
# Check if batch is full or last chunk
|
275 |
+
if len(samples) < cfg.BATCH_SIZE and chunk_index < len(chunks) - 1:
|
276 |
+
continue
|
277 |
+
|
278 |
+
# Predict
|
279 |
+
prediction = predict(samples)
|
280 |
+
|
281 |
+
# Add to results
|
282 |
+
for i in range(len(samples)):
|
283 |
+
# Get timestamp
|
284 |
+
s_start, s_end = timestamps[i]
|
285 |
+
|
286 |
+
# Get prediction
|
287 |
+
pred = prediction[i]
|
288 |
+
|
289 |
+
# Assign scores to labels
|
290 |
+
p_labels = zip(cfg.LABELS, pred)
|
291 |
+
|
292 |
+
# Sort by score
|
293 |
+
p_sorted = sorted(p_labels, key=operator.itemgetter(1), reverse=True)
|
294 |
+
|
295 |
+
# Store top 5 results and advance indices
|
296 |
+
results[str(s_start) + "-" + str(s_end)] = p_sorted
|
297 |
+
|
298 |
+
# Clear batch
|
299 |
+
samples = []
|
300 |
+
timestamps = []
|
301 |
+
|
302 |
+
except Exception as ex:
|
303 |
+
# Write error log
|
304 |
+
print(f"Error: Cannot analyze audio file {fpath}.\n", flush=True)
|
305 |
+
utils.writeErrorLog(ex)
|
306 |
+
return False
|
307 |
+
|
308 |
+
# Save as selection table
|
309 |
+
try:
|
310 |
+
# We have to check if output path is a file or directory
|
311 |
+
if not cfg.OUTPUT_PATH.rsplit(".", 1)[-1].lower() in ["txt", "csv"]:
|
312 |
+
rpath = fpath.replace(cfg.INPUT_PATH, "")
|
313 |
+
rpath = rpath[1:] if rpath[0] in ["/", "\\"] else rpath
|
314 |
+
|
315 |
+
# Make target directory if it doesn't exist
|
316 |
+
rdir = os.path.join(cfg.OUTPUT_PATH, os.path.dirname(rpath))
|
317 |
+
|
318 |
+
os.makedirs(rdir, exist_ok=True)
|
319 |
+
|
320 |
+
if cfg.RESULT_TYPE == "table":
|
321 |
+
rtype = "bat.selection.table.txt"
|
322 |
+
elif cfg.RESULT_TYPE == "audacity":
|
323 |
+
rtype = ".bat.results.txt"
|
324 |
+
else:
|
325 |
+
rtype = ".bat.results.csv"
|
326 |
+
|
327 |
+
out_string = save_result_file(results, os.path.join(cfg.OUTPUT_PATH, rpath.rsplit(".", 1)[0] + rtype), fpath)
|
328 |
+
else:
|
329 |
+
out_string = save_result_file(results, cfg.OUTPUT_PATH, fpath)
|
330 |
+
# Save as file
|
331 |
+
with open(cfg.OUTPUT_PATH + "Results.csv", "a", encoding="utf-8") as rfile:
|
332 |
+
postString = out_string.split("\n", 1)[1]
|
333 |
+
# rfile.write(fpath.join(postString.splitlines(True)))
|
334 |
+
rfile.write("\n"+fpath+"\n")
|
335 |
+
rfile.write(postString)
|
336 |
+
|
337 |
+
except Exception as ex:
|
338 |
+
# Write error log
|
339 |
+
print(f"Error: Cannot save result for {fpath}.\n", flush=True)
|
340 |
+
utils.writeErrorLog(ex)
|
341 |
+
return False
|
342 |
+
|
343 |
+
delta_time = (datetime.datetime.now() - start_time).total_seconds()
|
344 |
+
print("Finished {} in {:.2f} seconds".format(fpath, delta_time), flush=True)
|
345 |
+
return True
|
346 |
+
|
347 |
+
def set_analysis_location():
|
348 |
+
if args.area not in ["Bavaria", "Sweden", "EU", "Scotland", "UK", "USA","MarinCounty"]:
|
349 |
+
exit(code="Unknown location option.")
|
350 |
+
else:
|
351 |
+
args.lat = -1
|
352 |
+
args.lon = -1
|
353 |
+
# args.locale = "en"
|
354 |
+
|
355 |
+
if args.area == "Bavaria":
|
356 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Bavaria-144kHz.tflite"
|
357 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Bavaria-144kHz_Labels.txt"
|
358 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
359 |
+
args.locale = "de"
|
360 |
+
|
361 |
+
elif args.area == "EU":
|
362 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-EU-144kHz.tflite"
|
363 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-EU-144kHz_Labels.txt"
|
364 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
365 |
+
|
366 |
+
elif args.area == "Sweden":
|
367 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Sweden-144kHz.tflite"
|
368 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Sweden-144kHz_Labels.txt"
|
369 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
370 |
+
args.locale = "se"
|
371 |
+
|
372 |
+
elif args.area == "Scotland":
|
373 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Scotland-144kHz.tflite"
|
374 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-Scotland-144kHz_Labels.txt"
|
375 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
376 |
+
|
377 |
+
elif args.area == "UK":
|
378 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-UK-144kHz.tflite"
|
379 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-UK-144kHz_Labels.txt"
|
380 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
381 |
+
|
382 |
+
elif args.area == "USA":
|
383 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-USA-144kHz.tflite"
|
384 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-USA-144kHz_Labels.txt"
|
385 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
386 |
+
|
387 |
+
elif args.area == "MarinCounty":
|
388 |
+
cfg.CUSTOM_CLASSIFIER = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-MarinCounty-144kHz.tflite"
|
389 |
+
cfg.LABELS_FILE = cfg.BAT_CLASSIFIER_LOCATION + "/BattyBirdNET-MarinCounty-144kHz_Labels.txt"
|
390 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
391 |
+
|
392 |
+
else:
|
393 |
+
cfg.CUSTOM_CLASSIFIER = None
|
394 |
+
|
395 |
+
def set_paths():
|
396 |
+
# Set paths relative to script path (requested in #3)
|
397 |
+
script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
|
398 |
+
cfg.MODEL_PATH = os.path.join(script_dir, cfg.MODEL_PATH)
|
399 |
+
cfg.LABELS_FILE = os.path.join(script_dir, cfg.LABELS_FILE)
|
400 |
+
cfg.TRANSLATED_LABELS_PATH = os.path.join(script_dir, cfg.TRANSLATED_LABELS_PATH)
|
401 |
+
cfg.MDATA_MODEL_PATH = os.path.join(script_dir, cfg.MDATA_MODEL_PATH)
|
402 |
+
cfg.CODES_FILE = os.path.join(script_dir, cfg.CODES_FILE)
|
403 |
+
cfg.ERROR_LOG_FILE = os.path.join(script_dir, cfg.ERROR_LOG_FILE)
|
404 |
+
cfg.BAT_CLASSIFIER_LOCATION = os.path.join(script_dir, cfg.BAT_CLASSIFIER_LOCATION)
|
405 |
+
cfg.INPUT_PATH = args.i
|
406 |
+
cfg.OUTPUT_PATH = args.o
|
407 |
+
|
408 |
+
def set_custom_classifier():
|
409 |
+
if args.classifier is None:
|
410 |
+
return
|
411 |
+
cfg.CUSTOM_CLASSIFIER = args.classifier # we treat this as absolute path, so no need to join with dirname
|
412 |
+
cfg.LABELS_FILE = args.classifier.replace(".tflite", "_Labels.txt") # same for labels file
|
413 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
414 |
+
args.lat = -1
|
415 |
+
args.lon = -1
|
416 |
+
# args.locale = "en"
|
417 |
+
|
418 |
+
def add_parser_arguments():
|
419 |
+
parser.add_argument("--area",
|
420 |
+
default="EU",
|
421 |
+
help="Location. Values in ['Bavaria', 'EU', 'Sweden','Scotland', 'UK', 'USA', 'MarinCounty']. "
|
422 |
+
"Defaults to Bavaria.")
|
423 |
+
|
424 |
+
parser.add_argument("--sensitivity",
|
425 |
+
type=float,
|
426 |
+
default=1.0,
|
427 |
+
help="Detection sensitivity; Higher values result in higher sensitivity. "
|
428 |
+
"Values in [0.5, 1.5]. Defaults to 1.0."
|
429 |
+
)
|
430 |
+
parser.add_argument("--min_conf",
|
431 |
+
type=float,
|
432 |
+
default=0.7,
|
433 |
+
help="Minimum confidence threshold. Values in [0.01, 0.99]. Defaults to 0.1.")
|
434 |
+
|
435 |
+
parser.add_argument("--overlap",
|
436 |
+
type=float,
|
437 |
+
default=0.0,
|
438 |
+
help="Overlap of prediction segments. Values in [0.0, 2.9]. Defaults to 0.0."
|
439 |
+
)
|
440 |
+
parser.add_argument("--rtype",
|
441 |
+
default="csv",
|
442 |
+
help="Specifies output format. Values in ['table', 'audacity', 'r', 'kaleidoscope', 'csv']. "
|
443 |
+
"Defaults to 'csv' (Raven selection table)."
|
444 |
+
)
|
445 |
+
parser.add_argument("--threads",
|
446 |
+
type=int,
|
447 |
+
default=4,
|
448 |
+
help="Number of CPU threads.")
|
449 |
+
parser.add_argument("--batchsize",
|
450 |
+
type=int,
|
451 |
+
default=1,
|
452 |
+
help="Number of samples to process at the same time. Defaults to 1."
|
453 |
+
)
|
454 |
+
parser.add_argument("--sf_thresh",
|
455 |
+
type=float,
|
456 |
+
default=0.03,
|
457 |
+
help="Minimum species occurrence frequency threshold for location filter. "
|
458 |
+
"Values in [0.01, 0.99]. Defaults to 0.03."
|
459 |
+
)
|
460 |
+
parser.add_argument("--segment",
|
461 |
+
default="off",
|
462 |
+
help="Generate audio files containing the detected segments. "
|
463 |
+
)
|
464 |
+
parser.add_argument("--spectrum",
|
465 |
+
default="off",
|
466 |
+
help="Generate mel spectrograms files containing the detected segments. "
|
467 |
+
)
|
468 |
+
parser.add_argument("--i",
|
469 |
+
default=cfg.INPUT_PATH_SAMPLES, # "put-your-files-here/",
|
470 |
+
help="Path to input file or folder. If this is a file, --o needs to be a file too.")
|
471 |
+
parser.add_argument("--o",
|
472 |
+
default=cfg.OUTPUT_PATH_SAMPLES,
|
473 |
+
help="Path to output file or folder. If this is a file, --i needs to be a file too.")
|
474 |
+
|
475 |
+
parser.add_argument("--classifier",
|
476 |
+
default=None,
|
477 |
+
help="Path to custom trained classifier. Defaults to None. "
|
478 |
+
"If set, --lat, --lon and --locale are ignored."
|
479 |
+
)
|
480 |
+
parser.add_argument("--slist",
|
481 |
+
default="",
|
482 |
+
help='Path to species list file or folder. If folder is provided, species list needs to be '
|
483 |
+
'named "species_list.txt". If lat and lon are provided, this list will be ignored.'
|
484 |
+
)
|
485 |
+
parser.add_argument("--lat",
|
486 |
+
type=float,
|
487 |
+
default=-1,
|
488 |
+
help="DISABLED. Set -1 to ignore.")
|
489 |
+
parser.add_argument("--lon",
|
490 |
+
type=float,
|
491 |
+
default=-1,
|
492 |
+
help="DISABLED. Set -1 to ignore.")
|
493 |
+
parser.add_argument("--week",
|
494 |
+
type=int,
|
495 |
+
default=-1,
|
496 |
+
help="DISABLED. Set -1 for year-round species list."
|
497 |
+
)
|
498 |
+
parser.add_argument("--locale",
|
499 |
+
default="en",
|
500 |
+
help="DISABLED. Defaults to 'en'."
|
501 |
+
)
|
502 |
+
|
503 |
+
def load_ebird_codes():
|
504 |
+
cfg.CODES = load_codes()
|
505 |
+
cfg.LABELS = utils.readLines(cfg.LABELS_FILE)
|
506 |
+
|
507 |
+
def load_species_list():
|
508 |
+
cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK = args.lat, args.lon, args.week
|
509 |
+
cfg.LOCATION_FILTER_THRESHOLD = max(0.01, min(0.99, float(args.sf_thresh)))
|
510 |
+
script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
|
511 |
+
|
512 |
+
if cfg.LATITUDE == -1 and cfg.LONGITUDE == -1:
|
513 |
+
if not args.slist:
|
514 |
+
cfg.SPECIES_LIST_FILE = None
|
515 |
+
else:
|
516 |
+
cfg.SPECIES_LIST_FILE = os.path.join(script_dir, args.slist)
|
517 |
+
|
518 |
+
if os.path.isdir(cfg.SPECIES_LIST_FILE):
|
519 |
+
cfg.SPECIES_LIST_FILE = os.path.join(cfg.SPECIES_LIST_FILE, "species_list.txt")
|
520 |
+
|
521 |
+
cfg.SPECIES_LIST = utils.readLines(cfg.SPECIES_LIST_FILE)
|
522 |
+
else:
|
523 |
+
cfg.SPECIES_LIST_FILE = None
|
524 |
+
cfg.SPECIES_LIST = species.getSpeciesList(cfg.LATITUDE, cfg.LONGITUDE, cfg.WEEK, cfg.LOCATION_FILTER_THRESHOLD)
|
525 |
+
if not cfg.SPECIES_LIST:
|
526 |
+
print(f"Species list contains {len(cfg.LABELS)} species")
|
527 |
+
else:
|
528 |
+
print(f"Species list contains {len(cfg.SPECIES_LIST)} species")
|
529 |
+
|
530 |
+
def parse_input_files():
|
531 |
+
if os.path.isdir(cfg.INPUT_PATH):
|
532 |
+
cfg.FILE_LIST = utils.collect_audio_files(cfg.INPUT_PATH)
|
533 |
+
print(f"Found {len(cfg.FILE_LIST)} files to analyze")
|
534 |
+
else:
|
535 |
+
cfg.FILE_LIST = [cfg.INPUT_PATH]
|
536 |
+
|
537 |
+
def set_analysis_parameters():
|
538 |
+
cfg.MIN_CONFIDENCE = max(0.01, min(0.99, float(args.min_conf)))
|
539 |
+
cfg.SIGMOID_SENSITIVITY = max(0.5, min(1.0 - (float(args.sensitivity) - 1.0), 1.5))
|
540 |
+
cfg.SIG_OVERLAP = max(0.0, min(2.9, float(args.overlap)))
|
541 |
+
cfg.BATCH_SIZE = max(1, int(args.batchsize))
|
542 |
+
|
543 |
+
def set_hardware_parameters():
|
544 |
+
if os.path.isdir(cfg.INPUT_PATH):
|
545 |
+
cfg.CPU_THREADS = max(1, int(args.threads))
|
546 |
+
cfg.TFLITE_THREADS = 1
|
547 |
+
else:
|
548 |
+
cfg.CPU_THREADS = 1
|
549 |
+
cfg.TFLITE_THREADS = max(1, int(args.threads))
|
550 |
+
|
551 |
+
def load_translated_labels():
|
552 |
+
cfg.TRANSLATED_LABELS_PATH = cfg.TRANSLATED_BAT_LABELS_PATH
|
553 |
+
lfile = os.path.join(cfg.TRANSLATED_LABELS_PATH,
|
554 |
+
os.path.basename(cfg.LABELS_FILE).replace(".txt", "_{}.txt".format(args.locale))
|
555 |
+
)
|
556 |
+
if args.locale not in ["en"] and os.path.isfile(lfile):
|
557 |
+
cfg.TRANSLATED_LABELS = utils.readLines(lfile)
|
558 |
+
else:
|
559 |
+
cfg.TRANSLATED_LABELS = cfg.LABELS
|
560 |
+
|
561 |
+
def check_result_type():
|
562 |
+
cfg.RESULT_TYPE = args.rtype.lower()
|
563 |
+
if cfg.RESULT_TYPE not in ["table", "audacity", "r", "kaleidoscope", "csv"]:
|
564 |
+
cfg.RESULT_TYPE = "csv"
|
565 |
+
print("Unknown output option. Using csv output.")
|
566 |
+
|
567 |
+
if __name__ == "__main__":
|
568 |
+
freeze_support() # Freeze support for executable
|
569 |
+
parser = argparse.ArgumentParser(description="Analyze audio files with BattyBirdNET")
|
570 |
+
add_parser_arguments()
|
571 |
+
args = parser.parse_args()
|
572 |
+
set_paths()
|
573 |
+
load_ebird_codes()
|
574 |
+
set_custom_classifier()
|
575 |
+
check_result_type()
|
576 |
+
set_analysis_location()
|
577 |
+
load_translated_labels()
|
578 |
+
load_species_list()
|
579 |
+
parse_input_files()
|
580 |
+
set_analysis_parameters()
|
581 |
+
set_hardware_parameters()
|
582 |
+
# Add config items to each file list entry.
|
583 |
+
# We have to do this for Windows which does not
|
584 |
+
# support fork() and thus each process has to
|
585 |
+
# have its own config. USE LINUX!
|
586 |
+
flist = [(f, cfg.get_config()) for f in cfg.FILE_LIST]
|
587 |
+
|
588 |
+
# Analyze files
|
589 |
+
if cfg.CPU_THREADS < 2:
|
590 |
+
for entry in flist:
|
591 |
+
analyze_file(entry)
|
592 |
+
else:
|
593 |
+
with Pool(cfg.CPU_THREADS) as p:
|
594 |
+
p.map(analyze_file, flist)
|
595 |
+
|
596 |
+
if args.segment == "on" or args.spectrum == "on":
|
597 |
+
subprocess.run(["python3", "segments.py"])
|
598 |
+
|
599 |
+
if args.spectrum == "on":
|
600 |
+
# iterate through the segements folder subfolders, call the plotter
|
601 |
+
print("Spectrums in progress ...")
|
602 |
+
script_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
|
603 |
+
root_dir = pathlib.Path(os.path.join(script_dir, args.i + "/segments"))
|
604 |
+
for dir_name in os.listdir(root_dir):
|
605 |
+
f = os.path.join(root_dir, dir_name)
|
606 |
+
if not os.path.isfile(f):
|
607 |
+
print("Spectrum in progres for: " + f)
|
608 |
+
cmd = ['python3', "batchspec.py", f, f]
|
609 |
+
subprocess.run(cmd)
|
610 |
+
# A few examples to test
|
611 |
+
# python3 analyze.py --i example/ --o example/ --slist example/ --min_conf 0.5 --threads 4
|
612 |
+
# python3 analyze.py --i example/soundscape.wav --o example/soundscape.BirdNET.selection.table.txt --slist example/species_list.txt --threads 8
|
613 |
+
# python3 analyze.py --i example/ --o example/ --lat 42.5 --lon -76.45 --week 4 --sensitivity 1.0 --rtype table --locale de
|
614 |
+
|
615 |
+
|
616 |
+
|
config.py
ADDED
@@ -0,0 +1,257 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#################
|
2 |
+
# Misc settings #
|
3 |
+
#################
|
4 |
+
|
5 |
+
# Random seed for gaussian noise
|
6 |
+
RANDOM_SEED = 42
|
7 |
+
|
8 |
+
##########################
|
9 |
+
# Model paths and config #
|
10 |
+
##########################
|
11 |
+
# These BirdNET models are necessary also for detecting bats as we use their embeddings and classify
|
12 |
+
# them to identify the bats.
|
13 |
+
# MODEL_PATH = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model' # This will load the protobuf model
|
14 |
+
MODEL_PATH = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Model_FP32.tflite'
|
15 |
+
MDATA_MODEL_PATH = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_MData_Model_FP16.tflite'
|
16 |
+
LABELS_FILE = 'checkpoints/V2.4/BirdNET_GLOBAL_6K_V2.4_Labels.txt'
|
17 |
+
TRANSLATED_LABELS_PATH = 'labels/V2.4'
|
18 |
+
TRANSLATED_BAT_LABELS_PATH = 'labels/bats/'
|
19 |
+
|
20 |
+
# Path to custom trained classifier
|
21 |
+
# If None, no custom classifier will be used
|
22 |
+
# Make sure to set the LABELS_FILE above accordingly
|
23 |
+
CUSTOM_CLASSIFIER = None
|
24 |
+
|
25 |
+
##################
|
26 |
+
# Audio settings #
|
27 |
+
##################
|
28 |
+
|
29 |
+
# BirdNET uses a sample rate of 48kHz, so the model input size is
|
30 |
+
# (batch size, 48000 kHz * 3 seconds) = (1, 144000)
|
31 |
+
# Recordings will be resampled automatically.
|
32 |
+
# For bats we use: 144000 for 1 sec.
|
33 |
+
# Note that only SIG_LENGTH * SAMPLING_RATE = 144000 combinations will work,
|
34 |
+
# values possible e.g. 144000 240000 360000 check your classifier frequency!
|
35 |
+
SAMPLE_RATE: int = 144000
|
36 |
+
|
37 |
+
# We're using 1-second chunks
|
38 |
+
SIG_LENGTH: float = 144000 / SAMPLE_RATE
|
39 |
+
|
40 |
+
# Define overlap between consecutive chunks < SIG_LENGTH; 0 = no overlap
|
41 |
+
SIG_OVERLAP: float = SIG_LENGTH / 4.0
|
42 |
+
|
43 |
+
# Define minimum length of audio chunk for prediction,
|
44 |
+
# chunks shorter than SIG_LENGTH seconds will be padded with zeros
|
45 |
+
SIG_MINLEN: float = SIG_LENGTH / 3.0
|
46 |
+
|
47 |
+
#####################
|
48 |
+
# Metadata settings #
|
49 |
+
#####################
|
50 |
+
# These settings are currently not in use for bat detection
|
51 |
+
LATITUDE = -1
|
52 |
+
LONGITUDE = -1
|
53 |
+
WEEK = -1
|
54 |
+
LOCATION_FILTER_THRESHOLD = 0.03
|
55 |
+
|
56 |
+
######################
|
57 |
+
# Inference settings #
|
58 |
+
######################
|
59 |
+
|
60 |
+
# If None or empty file, no custom species list will be used
|
61 |
+
# Note: Entries in this list have to match entries from the LABELS_FILE
|
62 |
+
# We use the 2021 eBird taxonomy for species names (Clements list)
|
63 |
+
CODES_FILE = 'eBird_taxonomy_codes_2021E.json'
|
64 |
+
SPECIES_LIST_FILE = 'example/species_list.txt'
|
65 |
+
|
66 |
+
# File input path and output path for selection tables
|
67 |
+
INPUT_PATH: str = 'example/'
|
68 |
+
OUTPUT_PATH: str = 'example/'
|
69 |
+
|
70 |
+
# Used for bats - the files here are supposed to be analyzed by default setting
|
71 |
+
INPUT_PATH_SAMPLES: str = 'put-your-files-here/'
|
72 |
+
OUTPUT_PATH_SAMPLES: str = 'put-your-files-here/results/'
|
73 |
+
BAT_CLASSIFIER_LOCATION: str = 'checkpoints/bats/v1.0'
|
74 |
+
|
75 |
+
ALLOWED_FILETYPES = ['wav', 'flac', 'mp3', 'ogg', 'm4a']
|
76 |
+
|
77 |
+
# Number of threads to use for inference.
|
78 |
+
# Can be as high as number of CPUs in your system
|
79 |
+
CPU_THREADS: int = 8
|
80 |
+
TFLITE_THREADS: int = 6
|
81 |
+
|
82 |
+
# False will output logits, True will convert to sigmoid activations
|
83 |
+
APPLY_SIGMOID: bool = True
|
84 |
+
SIGMOID_SENSITIVITY: float = 1.0
|
85 |
+
|
86 |
+
# Minimum confidence score to include in selection table
|
87 |
+
# (be aware: if APPLY_SIGMOID = False, this no longer represents
|
88 |
+
# probabilities and needs to be adjusted)
|
89 |
+
MIN_CONFIDENCE: float = 0.6
|
90 |
+
|
91 |
+
# Number of samples to process at the same time. Higher values can increase
|
92 |
+
# processing speed, but will also increase memory usage.
|
93 |
+
# Might only be useful for GPU inference.
|
94 |
+
BATCH_SIZE: int = 1
|
95 |
+
|
96 |
+
# Specifies the output format. 'table' denotes a Raven selection table,
|
97 |
+
# 'audacity' denotes a TXT file with the same format as Audacity timeline labels
|
98 |
+
# 'csv' denotes a CSV file with start, end, species and confidence.
|
99 |
+
RESULT_TYPE = 'csv'
|
100 |
+
|
101 |
+
#####################
|
102 |
+
# Training settings #
|
103 |
+
#####################
|
104 |
+
|
105 |
+
# Training data path
|
106 |
+
TRAIN_DATA_PATH = 'train_data/'
|
107 |
+
|
108 |
+
# Number of epochs to train for
|
109 |
+
TRAIN_EPOCHS: int = 100
|
110 |
+
|
111 |
+
# Batch size for training
|
112 |
+
TRAIN_BATCH_SIZE: int = 32
|
113 |
+
|
114 |
+
# Learning rate for training
|
115 |
+
TRAIN_LEARNING_RATE: float = 0.01
|
116 |
+
|
117 |
+
# Number of hidden units in custom classifier
|
118 |
+
# If >0, a two-layer classifier will be trained
|
119 |
+
TRAIN_HIDDEN_UNITS: int = 0
|
120 |
+
|
121 |
+
#####################
|
122 |
+
# Misc runtime vars #
|
123 |
+
#####################
|
124 |
+
CODES = {}
|
125 |
+
LABELS: list[str] = []
|
126 |
+
TRANSLATED_LABELS: list[str] = []
|
127 |
+
SPECIES_LIST: list[str] = []
|
128 |
+
ERROR_LOG_FILE: str = 'error_log.txt'
|
129 |
+
FILE_LIST = []
|
130 |
+
FILE_STORAGE_PATH = ''
|
131 |
+
|
132 |
+
|
133 |
+
######################
|
134 |
+
# Get and set config #
|
135 |
+
######################
|
136 |
+
|
137 |
+
def get_config():
|
138 |
+
return {
|
139 |
+
'RANDOM_SEED': RANDOM_SEED,
|
140 |
+
'MODEL_PATH': MODEL_PATH,
|
141 |
+
'MDATA_MODEL_PATH': MDATA_MODEL_PATH,
|
142 |
+
'LABELS_FILE': LABELS_FILE,
|
143 |
+
'CUSTOM_CLASSIFIER': CUSTOM_CLASSIFIER,
|
144 |
+
'SAMPLE_RATE': SAMPLE_RATE,
|
145 |
+
'SIG_LENGTH': SIG_LENGTH,
|
146 |
+
'SIG_OVERLAP': SIG_OVERLAP,
|
147 |
+
'SIG_MINLEN': SIG_MINLEN,
|
148 |
+
'LATITUDE': LATITUDE,
|
149 |
+
'LONGITUDE': LONGITUDE,
|
150 |
+
'WEEK': WEEK,
|
151 |
+
'LOCATION_FILTER_THRESHOLD': LOCATION_FILTER_THRESHOLD,
|
152 |
+
'CODES_FILE': CODES_FILE,
|
153 |
+
'SPECIES_LIST_FILE': SPECIES_LIST_FILE,
|
154 |
+
'INPUT_PATH': INPUT_PATH,
|
155 |
+
'OUTPUT_PATH': OUTPUT_PATH,
|
156 |
+
'CPU_THREADS': CPU_THREADS,
|
157 |
+
'TFLITE_THREADS': TFLITE_THREADS,
|
158 |
+
'APPLY_SIGMOID': APPLY_SIGMOID,
|
159 |
+
'SIGMOID_SENSITIVITY': SIGMOID_SENSITIVITY,
|
160 |
+
'MIN_CONFIDENCE': MIN_CONFIDENCE,
|
161 |
+
'BATCH_SIZE': BATCH_SIZE,
|
162 |
+
'RESULT_TYPE': RESULT_TYPE,
|
163 |
+
'TRAIN_DATA_PATH': TRAIN_DATA_PATH,
|
164 |
+
'TRAIN_EPOCHS': TRAIN_EPOCHS,
|
165 |
+
'TRAIN_BATCH_SIZE': TRAIN_BATCH_SIZE,
|
166 |
+
'TRAIN_LEARNING_RATE': TRAIN_LEARNING_RATE,
|
167 |
+
'TRAIN_HIDDEN_UNITS': TRAIN_HIDDEN_UNITS,
|
168 |
+
'CODES': CODES,
|
169 |
+
'LABELS': LABELS,
|
170 |
+
'TRANSLATED_LABELS': TRANSLATED_LABELS,
|
171 |
+
'SPECIES_LIST': SPECIES_LIST,
|
172 |
+
'ERROR_LOG_FILE': ERROR_LOG_FILE,
|
173 |
+
'INPUT_PATH_SAMPLES': INPUT_PATH_SAMPLES,
|
174 |
+
'OUTPUT_PATH_SAMPLES': OUTPUT_PATH_SAMPLES,
|
175 |
+
'BAT_CLASSIFIER_LOCATION': BAT_CLASSIFIER_LOCATION,
|
176 |
+
'TRANSLATED_BAT_LABELS_PATH': TRANSLATED_BAT_LABELS_PATH
|
177 |
+
}
|
178 |
+
|
179 |
+
|
180 |
+
def set_config(c):
|
181 |
+
global RANDOM_SEED
|
182 |
+
global MODEL_PATH
|
183 |
+
global MDATA_MODEL_PATH
|
184 |
+
global LABELS_FILE
|
185 |
+
global CUSTOM_CLASSIFIER
|
186 |
+
global SAMPLE_RATE
|
187 |
+
global SIG_LENGTH
|
188 |
+
global SIG_OVERLAP
|
189 |
+
global SIG_MINLEN
|
190 |
+
global LATITUDE
|
191 |
+
global LONGITUDE
|
192 |
+
global WEEK
|
193 |
+
global LOCATION_FILTER_THRESHOLD
|
194 |
+
global CODES_FILE
|
195 |
+
global SPECIES_LIST_FILE
|
196 |
+
global INPUT_PATH
|
197 |
+
global OUTPUT_PATH
|
198 |
+
global CPU_THREADS
|
199 |
+
global TFLITE_THREADS
|
200 |
+
global APPLY_SIGMOID
|
201 |
+
global SIGMOID_SENSITIVITY
|
202 |
+
global MIN_CONFIDENCE
|
203 |
+
global BATCH_SIZE
|
204 |
+
global RESULT_TYPE
|
205 |
+
global TRAIN_DATA_PATH
|
206 |
+
global TRAIN_EPOCHS
|
207 |
+
global TRAIN_BATCH_SIZE
|
208 |
+
global TRAIN_LEARNING_RATE
|
209 |
+
global TRAIN_HIDDEN_UNITS
|
210 |
+
global CODES
|
211 |
+
global LABELS
|
212 |
+
global TRANSLATED_LABELS
|
213 |
+
global SPECIES_LIST
|
214 |
+
global ERROR_LOG_FILE
|
215 |
+
global INPUT_PATH_SAMPLES
|
216 |
+
global OUTPUT_PATH_SAMPLES
|
217 |
+
global BAT_CLASSIFIER_LOCATION
|
218 |
+
global TRANSLATED_BAT_LABELS_PATH
|
219 |
+
|
220 |
+
RANDOM_SEED = c['RANDOM_SEED']
|
221 |
+
MODEL_PATH = c['MODEL_PATH']
|
222 |
+
MDATA_MODEL_PATH = c['MDATA_MODEL_PATH']
|
223 |
+
LABELS_FILE = c['LABELS_FILE']
|
224 |
+
CUSTOM_CLASSIFIER = c['CUSTOM_CLASSIFIER']
|
225 |
+
SAMPLE_RATE = c['SAMPLE_RATE']
|
226 |
+
SIG_LENGTH = c['SIG_LENGTH']
|
227 |
+
SIG_OVERLAP = c['SIG_OVERLAP']
|
228 |
+
SIG_MINLEN = c['SIG_MINLEN']
|
229 |
+
LATITUDE = c['LATITUDE']
|
230 |
+
LONGITUDE = c['LONGITUDE']
|
231 |
+
WEEK = c['WEEK']
|
232 |
+
LOCATION_FILTER_THRESHOLD = c['LOCATION_FILTER_THRESHOLD']
|
233 |
+
CODES_FILE = c['CODES_FILE']
|
234 |
+
SPECIES_LIST_FILE = c['SPECIES_LIST_FILE']
|
235 |
+
INPUT_PATH = c['INPUT_PATH']
|
236 |
+
OUTPUT_PATH = c['OUTPUT_PATH']
|
237 |
+
CPU_THREADS = c['CPU_THREADS']
|
238 |
+
TFLITE_THREADS = c['TFLITE_THREADS']
|
239 |
+
APPLY_SIGMOID = c['APPLY_SIGMOID']
|
240 |
+
SIGMOID_SENSITIVITY = c['SIGMOID_SENSITIVITY']
|
241 |
+
MIN_CONFIDENCE = c['MIN_CONFIDENCE']
|
242 |
+
BATCH_SIZE = c['BATCH_SIZE']
|
243 |
+
RESULT_TYPE = c['RESULT_TYPE']
|
244 |
+
TRAIN_DATA_PATH = c['TRAIN_DATA_PATH']
|
245 |
+
TRAIN_EPOCHS = c['TRAIN_EPOCHS']
|
246 |
+
TRAIN_BATCH_SIZE = c['TRAIN_BATCH_SIZE']
|
247 |
+
TRAIN_LEARNING_RATE = c['TRAIN_LEARNING_RATE']
|
248 |
+
TRAIN_HIDDEN_UNITS = c['TRAIN_HIDDEN_UNITS']
|
249 |
+
CODES = c['CODES']
|
250 |
+
LABELS = c['LABELS']
|
251 |
+
TRANSLATED_LABELS = c['TRANSLATED_LABELS']
|
252 |
+
SPECIES_LIST = c['SPECIES_LIST']
|
253 |
+
ERROR_LOG_FILE = c['ERROR_LOG_FILE']
|
254 |
+
INPUT_PATH_SAMPLES = c['INPUT_PATH_SAMPLES']
|
255 |
+
OUTPUT_PATH_SAMPLES = c['OUTPUT_PATH_SAMPLES']
|
256 |
+
BAT_CLASSIFIER_LOCATION = c['BAT_CLASSIFIER_LOCATION']
|
257 |
+
TRANSLATED_BAT_LABELS_PATH = c['TRANSLATED_BAT_LABELS_PATH']
|
model.py
ADDED
@@ -0,0 +1,389 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Contains functions to use the BirdNET models.
|
2 |
+
"""
|
3 |
+
import os
|
4 |
+
import warnings
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
import config as cfg
|
9 |
+
|
10 |
+
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
|
11 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
12 |
+
|
13 |
+
warnings.filterwarnings("ignore")
|
14 |
+
|
15 |
+
# Import TFLite from runtime or Tensorflow;
|
16 |
+
# import Keras if protobuf model;
|
17 |
+
# NOTE: we have to use TFLite if we want to use
|
18 |
+
# the metadata model or want to extract embeddings
|
19 |
+
try:
|
20 |
+
import tflite_runtime.interpreter as tflite
|
21 |
+
except ModuleNotFoundError:
|
22 |
+
from tensorflow import lite as tflite
|
23 |
+
if not cfg.MODEL_PATH.endswith(".tflite"):
|
24 |
+
from tensorflow import keras
|
25 |
+
|
26 |
+
INTERPRETER: tflite.Interpreter = None
|
27 |
+
C_INTERPRETER: tflite.Interpreter = None
|
28 |
+
M_INTERPRETER: tflite.Interpreter = None
|
29 |
+
PBMODEL = None
|
30 |
+
|
31 |
+
|
32 |
+
def loadModel(class_output=True):
|
33 |
+
"""Initializes the BirdNET Model.
|
34 |
+
|
35 |
+
Args:
|
36 |
+
class_output: Omits the last layer when False.
|
37 |
+
"""
|
38 |
+
global PBMODEL
|
39 |
+
global INTERPRETER
|
40 |
+
global INPUT_LAYER_INDEX
|
41 |
+
global OUTPUT_LAYER_INDEX
|
42 |
+
|
43 |
+
# Do we have to load the tflite or protobuf model?
|
44 |
+
if cfg.MODEL_PATH.endswith(".tflite"):
|
45 |
+
# Load TFLite model and allocate tensors.
|
46 |
+
INTERPRETER = tflite.Interpreter(model_path=cfg.MODEL_PATH, num_threads=cfg.TFLITE_THREADS)
|
47 |
+
INTERPRETER.allocate_tensors()
|
48 |
+
|
49 |
+
# Get input and output tensors.
|
50 |
+
input_details = INTERPRETER.get_input_details()
|
51 |
+
output_details = INTERPRETER.get_output_details()
|
52 |
+
|
53 |
+
# Get input tensor index
|
54 |
+
INPUT_LAYER_INDEX = input_details[0]["index"]
|
55 |
+
|
56 |
+
# Get classification output or feature embeddings
|
57 |
+
if class_output:
|
58 |
+
OUTPUT_LAYER_INDEX = output_details[0]["index"]
|
59 |
+
else:
|
60 |
+
OUTPUT_LAYER_INDEX = output_details[0]["index"] - 1
|
61 |
+
|
62 |
+
else:
|
63 |
+
# Load protobuf model
|
64 |
+
# Note: This will throw a bunch of warnings about custom gradients
|
65 |
+
# which we will ignore until TF lets us block them
|
66 |
+
PBMODEL = keras.models.load_model(cfg.MODEL_PATH, compile=False)
|
67 |
+
|
68 |
+
|
69 |
+
def loadCustomClassifier():
|
70 |
+
"""Loads the custom classifier."""
|
71 |
+
global C_INTERPRETER
|
72 |
+
global C_INPUT_LAYER_INDEX
|
73 |
+
global C_OUTPUT_LAYER_INDEX
|
74 |
+
|
75 |
+
# Load TFLite model and allocate tensors.
|
76 |
+
C_INTERPRETER = tflite.Interpreter(model_path=cfg.CUSTOM_CLASSIFIER, num_threads=cfg.TFLITE_THREADS)
|
77 |
+
C_INTERPRETER.allocate_tensors()
|
78 |
+
|
79 |
+
# Get input and output tensors.
|
80 |
+
input_details = C_INTERPRETER.get_input_details()
|
81 |
+
output_details = C_INTERPRETER.get_output_details()
|
82 |
+
|
83 |
+
# Get input tensor index
|
84 |
+
C_INPUT_LAYER_INDEX = input_details[0]["index"]
|
85 |
+
|
86 |
+
# Get classification output
|
87 |
+
C_OUTPUT_LAYER_INDEX = output_details[0]["index"]
|
88 |
+
|
89 |
+
|
90 |
+
def loadMetaModel():
|
91 |
+
"""Loads the model for species prediction.
|
92 |
+
|
93 |
+
Initializes the model used to predict species list, based on coordinates and week of year.
|
94 |
+
"""
|
95 |
+
global M_INTERPRETER
|
96 |
+
global M_INPUT_LAYER_INDEX
|
97 |
+
global M_OUTPUT_LAYER_INDEX
|
98 |
+
|
99 |
+
# Load TFLite model and allocate tensors.
|
100 |
+
M_INTERPRETER = tflite.Interpreter(model_path=cfg.MDATA_MODEL_PATH, num_threads=cfg.TFLITE_THREADS)
|
101 |
+
M_INTERPRETER.allocate_tensors()
|
102 |
+
|
103 |
+
# Get input and output tensors.
|
104 |
+
input_details = M_INTERPRETER.get_input_details()
|
105 |
+
output_details = M_INTERPRETER.get_output_details()
|
106 |
+
|
107 |
+
# Get input tensor index
|
108 |
+
M_INPUT_LAYER_INDEX = input_details[0]["index"]
|
109 |
+
M_OUTPUT_LAYER_INDEX = output_details[0]["index"]
|
110 |
+
|
111 |
+
|
112 |
+
def buildLinearClassifier(num_labels, input_size, hidden_units=0):
|
113 |
+
"""Builds a classifier.
|
114 |
+
|
115 |
+
Args:
|
116 |
+
num_labels: Output size.
|
117 |
+
input_size: Size of the input.
|
118 |
+
hidden_units: If > 0, creates another hidden layer with the given number of units.
|
119 |
+
|
120 |
+
Returns:
|
121 |
+
A new classifier.
|
122 |
+
"""
|
123 |
+
# import keras
|
124 |
+
from tensorflow import keras
|
125 |
+
|
126 |
+
# Build a simple one- or two-layer linear classifier
|
127 |
+
model = keras.Sequential()
|
128 |
+
|
129 |
+
# Input layer
|
130 |
+
model.add(keras.layers.InputLayer(input_shape=(input_size,)))
|
131 |
+
|
132 |
+
# Hidden layer
|
133 |
+
if hidden_units > 0:
|
134 |
+
model.add(keras.layers.Dense(hidden_units, activation="relu"))
|
135 |
+
|
136 |
+
# Classification layer
|
137 |
+
model.add(keras.layers.Dense(num_labels))
|
138 |
+
|
139 |
+
# Activation layer
|
140 |
+
model.add(keras.layers.Activation("sigmoid"))
|
141 |
+
|
142 |
+
return model
|
143 |
+
|
144 |
+
|
145 |
+
def trainLinearClassifier(classifier, x_train, y_train, epochs, batch_size, learning_rate, on_epoch_end=None):
|
146 |
+
"""Trains a custom classifier.
|
147 |
+
|
148 |
+
Trains a new classifier for BirdNET based on the given data.
|
149 |
+
|
150 |
+
Args:
|
151 |
+
classifier: The classifier to be trained.
|
152 |
+
x_train: Samples.
|
153 |
+
y_train: Labels.
|
154 |
+
epochs: Number of epochs to train.
|
155 |
+
batch_size: Batch size.
|
156 |
+
learning_rate: The learning rate during training.
|
157 |
+
on_epoch_end: Optional callback `function(epoch, logs)`.
|
158 |
+
|
159 |
+
Returns:
|
160 |
+
(classifier, history)
|
161 |
+
"""
|
162 |
+
# import keras
|
163 |
+
from tensorflow import keras
|
164 |
+
|
165 |
+
class FunctionCallback(keras.callbacks.Callback):
|
166 |
+
def __init__(self, on_epoch_end=None) -> None:
|
167 |
+
super().__init__()
|
168 |
+
self.on_epoch_end_fn = on_epoch_end
|
169 |
+
|
170 |
+
def on_epoch_end(self, epoch, logs=None):
|
171 |
+
if self.on_epoch_end_fn:
|
172 |
+
self.on_epoch_end_fn(epoch, logs)
|
173 |
+
|
174 |
+
# Set random seed
|
175 |
+
np.random.seed(cfg.RANDOM_SEED)
|
176 |
+
|
177 |
+
# Shuffle data
|
178 |
+
idx = np.arange(x_train.shape[0])
|
179 |
+
np.random.shuffle(idx)
|
180 |
+
x_train = x_train[idx]
|
181 |
+
y_train = y_train[idx]
|
182 |
+
|
183 |
+
# Random val split
|
184 |
+
x_val = x_train[int(0.8 * x_train.shape[0]) :]
|
185 |
+
y_val = y_train[int(0.8 * y_train.shape[0]) :]
|
186 |
+
|
187 |
+
# Early stopping
|
188 |
+
callbacks = [
|
189 |
+
keras.callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True),
|
190 |
+
FunctionCallback(on_epoch_end=on_epoch_end),
|
191 |
+
]
|
192 |
+
|
193 |
+
# Cosine annealing lr schedule
|
194 |
+
lr_schedule = keras.experimental.CosineDecay(learning_rate, epochs * x_train.shape[0] / batch_size)
|
195 |
+
|
196 |
+
# Compile model
|
197 |
+
classifier.compile(
|
198 |
+
optimizer=keras.optimizers.Adam(learning_rate=lr_schedule),
|
199 |
+
loss="binary_crossentropy",
|
200 |
+
metrics=keras.metrics.Precision(top_k=1, name="prec"),
|
201 |
+
)
|
202 |
+
|
203 |
+
# Train model
|
204 |
+
history = classifier.fit(
|
205 |
+
x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_val, y_val), callbacks=callbacks
|
206 |
+
)
|
207 |
+
|
208 |
+
return classifier, history
|
209 |
+
|
210 |
+
|
211 |
+
def saveLinearClassifier(classifier, model_path, labels):
|
212 |
+
"""Saves a custom classifier on the hard drive.
|
213 |
+
|
214 |
+
Saves the classifier as a tflite model, as well as the used labels in a .txt.
|
215 |
+
|
216 |
+
Args:
|
217 |
+
classifier: The custom classifier.
|
218 |
+
model_path: Path the model will be saved at.
|
219 |
+
labels: List of labels used for the classifier.
|
220 |
+
"""
|
221 |
+
# Make folders
|
222 |
+
os.makedirs(os.path.dirname(model_path), exist_ok=True)
|
223 |
+
|
224 |
+
# Remove activation layer
|
225 |
+
classifier.pop()
|
226 |
+
|
227 |
+
# Save model as tflite
|
228 |
+
converter = tflite.TFLiteConverter.from_keras_model(classifier)
|
229 |
+
tflite_model = converter.convert()
|
230 |
+
open(model_path, "wb").write(tflite_model)
|
231 |
+
|
232 |
+
# Save labels
|
233 |
+
with open(model_path.replace(".tflite", "_Labels.txt"), "w") as f:
|
234 |
+
for label in labels:
|
235 |
+
f.write(label + "\n")
|
236 |
+
|
237 |
+
|
238 |
+
def predictFilter(lat, lon, week):
|
239 |
+
"""Predicts the probability for each species.
|
240 |
+
|
241 |
+
Args:
|
242 |
+
lat: The latitude.
|
243 |
+
lon: The longitude.
|
244 |
+
week: The week of the year [1-48]. Use -1 for yearlong.
|
245 |
+
|
246 |
+
Returns:
|
247 |
+
A list of probabilities for all species.
|
248 |
+
"""
|
249 |
+
global M_INTERPRETER
|
250 |
+
|
251 |
+
# Does interpreter exist?
|
252 |
+
if M_INTERPRETER == None:
|
253 |
+
loadMetaModel()
|
254 |
+
|
255 |
+
# Prepare mdata as sample
|
256 |
+
sample = np.expand_dims(np.array([lat, lon, week], dtype="float32"), 0)
|
257 |
+
|
258 |
+
# Run inference
|
259 |
+
M_INTERPRETER.set_tensor(M_INPUT_LAYER_INDEX, sample)
|
260 |
+
M_INTERPRETER.invoke()
|
261 |
+
|
262 |
+
return M_INTERPRETER.get_tensor(M_OUTPUT_LAYER_INDEX)[0]
|
263 |
+
|
264 |
+
|
265 |
+
def explore(lat: float, lon: float, week: int):
|
266 |
+
"""Predicts the species list.
|
267 |
+
|
268 |
+
Predicts the species list based on the coordinates and week of year.
|
269 |
+
|
270 |
+
Args:
|
271 |
+
lat: The latitude.
|
272 |
+
lon: The longitude.
|
273 |
+
week: The week of the year [1-48]. Use -1 for yearlong.
|
274 |
+
|
275 |
+
Returns:
|
276 |
+
A sorted list of tuples with the score and the species.
|
277 |
+
"""
|
278 |
+
# Make filter prediction
|
279 |
+
l_filter = predictFilter(lat, lon, week)
|
280 |
+
|
281 |
+
# Apply threshold
|
282 |
+
l_filter = np.where(l_filter >= cfg.LOCATION_FILTER_THRESHOLD, l_filter, 0)
|
283 |
+
|
284 |
+
# Zip with labels
|
285 |
+
l_filter = list(zip(l_filter, cfg.LABELS))
|
286 |
+
|
287 |
+
# Sort by filter value
|
288 |
+
l_filter = sorted(l_filter, key=lambda x: x[0], reverse=True)
|
289 |
+
|
290 |
+
return l_filter
|
291 |
+
|
292 |
+
|
293 |
+
def flat_sigmoid(x, sensitivity=-1):
|
294 |
+
return 1 / (1.0 + np.exp(sensitivity * np.clip(x, -15, 15)))
|
295 |
+
|
296 |
+
|
297 |
+
def predict(sample):
|
298 |
+
"""Uses the main net to predict a sample.
|
299 |
+
|
300 |
+
Args:
|
301 |
+
sample: Audio sample.
|
302 |
+
|
303 |
+
Returns:
|
304 |
+
The prediction scores for the sample.
|
305 |
+
"""
|
306 |
+
# Has custom classifier?
|
307 |
+
if cfg.CUSTOM_CLASSIFIER != None:
|
308 |
+
return predictWithCustomClassifier(sample)
|
309 |
+
|
310 |
+
global INTERPRETER
|
311 |
+
|
312 |
+
# Does interpreter or keras model exist?
|
313 |
+
if INTERPRETER == None and PBMODEL == None:
|
314 |
+
loadModel()
|
315 |
+
|
316 |
+
if PBMODEL == None:
|
317 |
+
# Reshape input tensor
|
318 |
+
INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
|
319 |
+
INTERPRETER.allocate_tensors()
|
320 |
+
|
321 |
+
# Make a prediction (Audio only for now)
|
322 |
+
INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
|
323 |
+
INTERPRETER.invoke()
|
324 |
+
prediction = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
|
325 |
+
|
326 |
+
return prediction
|
327 |
+
|
328 |
+
else:
|
329 |
+
# Make a prediction (Audio only for now)
|
330 |
+
prediction = PBMODEL.predict(sample)
|
331 |
+
|
332 |
+
return prediction
|
333 |
+
|
334 |
+
|
335 |
+
def predictWithCustomClassifier(sample):
|
336 |
+
"""Uses the custom classifier to make a prediction.
|
337 |
+
|
338 |
+
Args:
|
339 |
+
sample: Audio sample.
|
340 |
+
|
341 |
+
Returns:
|
342 |
+
The prediction scores for the sample.
|
343 |
+
"""
|
344 |
+
global C_INTERPRETER
|
345 |
+
|
346 |
+
# Does interpreter exist?
|
347 |
+
if C_INTERPRETER == None:
|
348 |
+
loadCustomClassifier()
|
349 |
+
|
350 |
+
# Get embeddings
|
351 |
+
feature_vector = embeddings(sample)
|
352 |
+
|
353 |
+
# Reshape input tensor
|
354 |
+
C_INTERPRETER.resize_tensor_input(C_INPUT_LAYER_INDEX, [len(feature_vector), *feature_vector[0].shape])
|
355 |
+
C_INTERPRETER.allocate_tensors()
|
356 |
+
|
357 |
+
# Make a prediction
|
358 |
+
C_INTERPRETER.set_tensor(C_INPUT_LAYER_INDEX, np.array(feature_vector, dtype="float32"))
|
359 |
+
C_INTERPRETER.invoke()
|
360 |
+
prediction = C_INTERPRETER.get_tensor(C_OUTPUT_LAYER_INDEX)
|
361 |
+
|
362 |
+
return prediction
|
363 |
+
|
364 |
+
|
365 |
+
def embeddings(sample):
|
366 |
+
"""Extracts the embeddings for a sample.
|
367 |
+
|
368 |
+
Args:
|
369 |
+
sample: Audio samples.
|
370 |
+
|
371 |
+
Returns:
|
372 |
+
The embeddings.
|
373 |
+
"""
|
374 |
+
global INTERPRETER
|
375 |
+
|
376 |
+
# Does interpreter exist?
|
377 |
+
if INTERPRETER == None:
|
378 |
+
loadModel(False)
|
379 |
+
|
380 |
+
# Reshape input tensor
|
381 |
+
INTERPRETER.resize_tensor_input(INPUT_LAYER_INDEX, [len(sample), *sample[0].shape])
|
382 |
+
INTERPRETER.allocate_tensors()
|
383 |
+
|
384 |
+
# Extract feature embeddings
|
385 |
+
INTERPRETER.set_tensor(INPUT_LAYER_INDEX, np.array(sample, dtype="float32"))
|
386 |
+
INTERPRETER.invoke()
|
387 |
+
features = INTERPRETER.get_tensor(OUTPUT_LAYER_INDEX)
|
388 |
+
|
389 |
+
return features
|
requirements.txt
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
bottle==0.12.25
|
2 |
+
gradio==3.41.0
|
3 |
+
librosa==0.10.1
|
4 |
+
matplotlib==3.5.3
|
5 |
+
numpy==1.24.4
|
6 |
+
numpy==1.24.3
|
7 |
+
pyinstaller==5.13.0
|
8 |
+
pywebview==4.2.2
|
9 |
+
Requests==2.31.0
|
10 |
+
soundfile==0.12.1
|
11 |
+
tensorflow_macos==2.13.0
|
12 |
+
tflite_runtime==2.13.0
|
segments.py
ADDED
@@ -0,0 +1,305 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Extract segments from audio files based on BirdNET detections.
|
2 |
+
|
3 |
+
Can be used to save the segments of the audio files for each detection.
|
4 |
+
"""
|
5 |
+
import argparse
|
6 |
+
import os
|
7 |
+
from multiprocessing import Pool
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
import audio
|
12 |
+
import config as cfg
|
13 |
+
import utils
|
14 |
+
|
15 |
+
# Set numpy random seed
|
16 |
+
np.random.seed(cfg.RANDOM_SEED)
|
17 |
+
|
18 |
+
|
19 |
+
def detectRType(line: str):
|
20 |
+
"""Detects the type of result file.
|
21 |
+
|
22 |
+
Args:
|
23 |
+
line: First line of text.
|
24 |
+
|
25 |
+
Returns:
|
26 |
+
Either "table", "r", "kaleidoscope", "csv" or "audacity".
|
27 |
+
"""
|
28 |
+
if line.lower().startswith("selection"):
|
29 |
+
return "table"
|
30 |
+
elif line.lower().startswith("filepath"):
|
31 |
+
return "r"
|
32 |
+
elif line.lower().startswith("indir"):
|
33 |
+
return "kaleidoscope"
|
34 |
+
elif line.lower().startswith("start (s)"):
|
35 |
+
return "csv"
|
36 |
+
else:
|
37 |
+
return "audacity"
|
38 |
+
|
39 |
+
|
40 |
+
def parseFolders(apath: str, rpath: str, allowed_result_filetypes: list[str] = ["txt", "csv"]) -> list[dict]:
|
41 |
+
"""Read audio and result files.
|
42 |
+
|
43 |
+
Reads all audio files and BirdNET output inside directory recursively.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
apath: Path to search for audio files.
|
47 |
+
rpath: Path to search for result files.
|
48 |
+
allowed_result_filetypes: List of extensions for the result files.
|
49 |
+
|
50 |
+
Returns:
|
51 |
+
A list of {"audio": path_to_audio, "result": path_to_result }.
|
52 |
+
"""
|
53 |
+
data = {}
|
54 |
+
apath = apath.replace("/", os.sep).replace("\\", os.sep)
|
55 |
+
rpath = rpath.replace("/", os.sep).replace("\\", os.sep)
|
56 |
+
|
57 |
+
# Get all audio files
|
58 |
+
for root, _, files in os.walk(apath):
|
59 |
+
for f in files:
|
60 |
+
if f.rsplit(".", 1)[-1].lower() in cfg.ALLOWED_FILETYPES:
|
61 |
+
data[f.rsplit(".", 1)[0]] = {"audio": os.path.join(root, f), "result": ""}
|
62 |
+
|
63 |
+
# Get all result files
|
64 |
+
for root, _, files in os.walk(rpath):
|
65 |
+
for f in files:
|
66 |
+
if f.rsplit(".", 1)[-1] in allowed_result_filetypes and ".bat." in f:
|
67 |
+
data[f.split(".bat.", 1)[0]]["result"] = os.path.join(root, f)
|
68 |
+
|
69 |
+
# Convert to list
|
70 |
+
flist = [f for f in data.values() if f["result"]]
|
71 |
+
|
72 |
+
print(f"Found {len(flist)} audio files with valid result file.")
|
73 |
+
|
74 |
+
return flist
|
75 |
+
|
76 |
+
|
77 |
+
def parseFiles(flist: list[dict], max_segments=100):
|
78 |
+
"""Extracts the segments for all files.
|
79 |
+
|
80 |
+
Args:
|
81 |
+
flist: List of dict with {"audio": path_to_audio, "result": path_to_result }.
|
82 |
+
max_segments: Number of segments per species.
|
83 |
+
|
84 |
+
Returns:
|
85 |
+
TODO @kahst
|
86 |
+
"""
|
87 |
+
species_segments: dict[str, list] = {}
|
88 |
+
|
89 |
+
for f in flist:
|
90 |
+
# Paths
|
91 |
+
afile = f["audio"]
|
92 |
+
rfile = f["result"]
|
93 |
+
|
94 |
+
# Get all segments for result file
|
95 |
+
segments = findSegments(afile, rfile)
|
96 |
+
|
97 |
+
# Parse segments by species
|
98 |
+
for s in segments:
|
99 |
+
if s["species"] not in species_segments:
|
100 |
+
species_segments[s["species"]] = []
|
101 |
+
|
102 |
+
species_segments[s["species"]].append(s)
|
103 |
+
|
104 |
+
# Shuffle segments for each species and limit to max_segments
|
105 |
+
for s in species_segments:
|
106 |
+
np.random.shuffle(species_segments[s])
|
107 |
+
species_segments[s] = species_segments[s][:max_segments]
|
108 |
+
|
109 |
+
# Make dict of segments per audio file
|
110 |
+
segments: dict[str, list] = {}
|
111 |
+
seg_cnt = 0
|
112 |
+
|
113 |
+
for s in species_segments:
|
114 |
+
for seg in species_segments[s]:
|
115 |
+
if seg["audio"] not in segments:
|
116 |
+
segments[seg["audio"]] = []
|
117 |
+
|
118 |
+
segments[seg["audio"]].append(seg)
|
119 |
+
seg_cnt += 1
|
120 |
+
|
121 |
+
print(f"Found {seg_cnt} segments in {len(segments)} audio files.")
|
122 |
+
|
123 |
+
# Convert to list
|
124 |
+
flist = [tuple(e) for e in segments.items()]
|
125 |
+
|
126 |
+
return flist
|
127 |
+
|
128 |
+
|
129 |
+
def findSegments(afile: str, rfile: str):
|
130 |
+
"""Extracts the segments for an audio file from the results file
|
131 |
+
|
132 |
+
Args:
|
133 |
+
afile: Path to the audio file.
|
134 |
+
rfile: Path to the result file.
|
135 |
+
|
136 |
+
Returns:
|
137 |
+
A list of dicts in the form of
|
138 |
+
{"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence}
|
139 |
+
"""
|
140 |
+
segments: list[dict] = []
|
141 |
+
|
142 |
+
# Open and parse result file
|
143 |
+
lines = utils.readLines(rfile)
|
144 |
+
|
145 |
+
# Auto-detect result type
|
146 |
+
rtype = detectRType(lines[0])
|
147 |
+
|
148 |
+
# Get start and end times based on rtype
|
149 |
+
confidence = 0
|
150 |
+
start = end = 0.0
|
151 |
+
species = ""
|
152 |
+
|
153 |
+
for i, line in enumerate(lines):
|
154 |
+
if rtype == "table" and i > 0:
|
155 |
+
d = line.split("\t")
|
156 |
+
start = float(d[3])
|
157 |
+
end = float(d[4])
|
158 |
+
species = d[-2]
|
159 |
+
confidence = float(d[-1])
|
160 |
+
|
161 |
+
elif rtype == "audacity":
|
162 |
+
d = line.split("\t")
|
163 |
+
start = float(d[0])
|
164 |
+
end = float(d[1])
|
165 |
+
species = d[2].split(", ")[1]
|
166 |
+
confidence = float(d[-1])
|
167 |
+
|
168 |
+
elif rtype == "r" and i > 0:
|
169 |
+
d = line.split(",")
|
170 |
+
start = float(d[1])
|
171 |
+
end = float(d[2])
|
172 |
+
species = d[4]
|
173 |
+
confidence = float(d[5])
|
174 |
+
|
175 |
+
elif rtype == "kaleidoscope" and i > 0:
|
176 |
+
d = line.split(",")
|
177 |
+
start = float(d[3])
|
178 |
+
end = float(d[4]) + start
|
179 |
+
species = d[5]
|
180 |
+
confidence = float(d[7])
|
181 |
+
|
182 |
+
elif rtype == "csv" and i > 0:
|
183 |
+
d = line.split(",")
|
184 |
+
start = float(d[0])
|
185 |
+
end = float(d[1])
|
186 |
+
species = d[3]
|
187 |
+
confidence = float(d[4])
|
188 |
+
|
189 |
+
# Check if confidence is high enough
|
190 |
+
if confidence >= cfg.MIN_CONFIDENCE:
|
191 |
+
segments.append({"audio": afile, "start": start, "end": end, "species": species, "confidence": confidence})
|
192 |
+
|
193 |
+
return segments
|
194 |
+
|
195 |
+
|
196 |
+
def extractSegments(item: tuple[tuple[str, list[dict]], float, dict[str]]):
|
197 |
+
"""Saves each segment separately.
|
198 |
+
|
199 |
+
Creates an audio file for each species segment.
|
200 |
+
|
201 |
+
Args:
|
202 |
+
item: A tuple that contains ((audio file path, segments), segment length, config)
|
203 |
+
"""
|
204 |
+
# Paths and config
|
205 |
+
afile = item[0][0]
|
206 |
+
segments = item[0][1]
|
207 |
+
seg_length = item[1]
|
208 |
+
cfg.set_config(item[2])
|
209 |
+
|
210 |
+
# Status
|
211 |
+
print(f"Extracting segments from {afile}")
|
212 |
+
|
213 |
+
try:
|
214 |
+
# Open audio file
|
215 |
+
sig, _ = audio.openAudioFile(afile, cfg.SAMPLE_RATE)
|
216 |
+
except Exception as ex:
|
217 |
+
print(f"Error: Cannot open audio file {afile}", flush=True)
|
218 |
+
utils.writeErrorLog(ex)
|
219 |
+
|
220 |
+
return
|
221 |
+
|
222 |
+
# Extract segments
|
223 |
+
for seg_cnt, seg in enumerate(segments, 1):
|
224 |
+
try:
|
225 |
+
# Get start and end times
|
226 |
+
start = int(seg["start"] * cfg.SAMPLE_RATE)
|
227 |
+
end = int(seg["end"] * cfg.SAMPLE_RATE)
|
228 |
+
offset = ((seg_length * cfg.SAMPLE_RATE) - (end - start)) // 2
|
229 |
+
start = max(0, start - offset)
|
230 |
+
end = min(len(sig), end + offset)
|
231 |
+
|
232 |
+
# Make sure segment is long enough
|
233 |
+
if end > start:
|
234 |
+
# Get segment raw audio from signal
|
235 |
+
seg_sig = sig[int(start) : int(end)]
|
236 |
+
|
237 |
+
# Make output path
|
238 |
+
outpath = os.path.join(cfg.OUTPUT_PATH, seg["species"])
|
239 |
+
os.makedirs(outpath, exist_ok=True)
|
240 |
+
|
241 |
+
# Save segment
|
242 |
+
seg_name = "{:.3f}_{}_{}.wav".format(
|
243 |
+
seg["confidence"], seg_cnt, seg["audio"].rsplit(os.sep, 1)[-1].rsplit(".", 1)[0]
|
244 |
+
)
|
245 |
+
seg_path = os.path.join(outpath, seg_name)
|
246 |
+
audio.saveSignal(seg_sig, seg_path)
|
247 |
+
|
248 |
+
except Exception as ex:
|
249 |
+
# Write error log
|
250 |
+
print(f"Error: Cannot extract segments from {afile}.", flush=True)
|
251 |
+
utils.writeErrorLog(ex)
|
252 |
+
return False
|
253 |
+
|
254 |
+
return True
|
255 |
+
|
256 |
+
|
257 |
+
if __name__ == "__main__":
|
258 |
+
# Parse arguments
|
259 |
+
parser = argparse.ArgumentParser(description="Extract segments from audio files based on BirdNET detections.")
|
260 |
+
parser.add_argument("--audio", default="put-your-files-here/", help="Path to folder containing audio files.")
|
261 |
+
parser.add_argument("--results", default="put-your-files-here/results", help="Path to folder containing result files.")
|
262 |
+
parser.add_argument("--o", default="put-your-files-here/segments/", help="Output folder path for extracted segments.")
|
263 |
+
parser.add_argument(
|
264 |
+
"--min_conf", type=float, default=0.1, help="Minimum confidence threshold. Values in [0.01, 0.99]. Defaults to 0.1."
|
265 |
+
)
|
266 |
+
parser.add_argument("--max_segments", type=int, default=100, help="Number of randomly extracted segments per species.")
|
267 |
+
parser.add_argument(
|
268 |
+
"--seg_length", type=float, default=3.0, help="Length of extracted segments in seconds. Defaults to 3.0."
|
269 |
+
)
|
270 |
+
parser.add_argument("--threads", type=int, default=4, help="Number of CPU threads.")
|
271 |
+
|
272 |
+
args = parser.parse_args()
|
273 |
+
|
274 |
+
# Parse audio and result folders
|
275 |
+
cfg.FILE_LIST = parseFolders(args.audio, args.results)
|
276 |
+
|
277 |
+
# Set output folder
|
278 |
+
cfg.OUTPUT_PATH = args.o
|
279 |
+
|
280 |
+
# Set number of threads
|
281 |
+
cfg.CPU_THREADS = int(args.threads)
|
282 |
+
|
283 |
+
# Set confidence threshold
|
284 |
+
cfg.MIN_CONFIDENCE = max(0.01, min(0.99, float(args.min_conf)))
|
285 |
+
|
286 |
+
# Parse file list and make list of segments
|
287 |
+
cfg.FILE_LIST = parseFiles(cfg.FILE_LIST, max(1, int(args.max_segments)))
|
288 |
+
|
289 |
+
# Add config items to each file list entry.
|
290 |
+
# We have to do this for Windows which does not
|
291 |
+
# support fork() and thus each process has to
|
292 |
+
# have its own config. USE LINUX!
|
293 |
+
flist = [(entry, max(cfg.SIG_LENGTH, float(args.seg_length)), cfg.get_config()) for entry in cfg.FILE_LIST]
|
294 |
+
|
295 |
+
# Extract segments
|
296 |
+
if cfg.CPU_THREADS < 2:
|
297 |
+
for entry in flist:
|
298 |
+
extractSegments(entry)
|
299 |
+
else:
|
300 |
+
with Pool(cfg.CPU_THREADS) as p:
|
301 |
+
p.map(extractSegments, flist)
|
302 |
+
|
303 |
+
# A few examples to test
|
304 |
+
# python3 segments.py --audio example/ --results example/ --o example/segments/
|
305 |
+
# python3 segments.py --audio example/ --results example/ --o example/segments/ --seg_length 5.0 --min_conf 0.1 --max_segments 100 --threads 4
|